[
  {
    "path": ".ipynb_checkpoints/Untitled-checkpoint.ipynb",
    "content": "{\n \"cells\": [],\n \"metadata\": {},\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "BERT_SUMM.ipynb",
    "content": "{\n  \"nbformat\": 4,\n  \"nbformat_minor\": 0,\n  \"metadata\": {\n    \"colab\": {\n      \"name\": \"BERT SUMM.ipynb\",\n      \"version\": \"0.3.2\",\n      \"provenance\": [],\n      \"include_colab_link\": true\n    },\n    \"kernelspec\": {\n      \"name\": \"python3\",\n      \"display_name\": \"Python 3\"\n    },\n    \"accelerator\": \"GPU\"\n  },\n  \"cells\": [\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"view-in-github\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"<a href=\\\"https://colab.research.google.com/github/santhoshkolloju/bert_summ/blob/master/BERT_SUMM.ipynb\\\" target=\\\"_parent\\\"><img src=\\\"https://colab.research.google.com/assets/colab-badge.svg\\\" alt=\\\"Open In Colab\\\"/></a>\"\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"pbgEu1oyhPca\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"1ecf3d0e-75c4-4f0d-8a64-18fc6bcfb4a1\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 34\n        }\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"import tensorflow as tf\\n\",\n        \"device_name = tf.test.gpu_device_name()\\n\",\n        \"if device_name != '/device:GPU:0':\\n\",\n        \"  raise SystemError('GPU device not found')\\n\",\n        \"print('Found GPU at: {}'.format(device_name))\"\n      ],\n      \"execution_count\": 1,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"Found GPU at: /device:GPU:0\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"UqeTD_eGkxVd\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"from google.colab import auth\\n\",\n        \"auth.authenticate_user()\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"metadata\": {\n        \"id\": \"SFCZikuuibh7\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"f228c486-ff0b-4740-c9c2-10a2d9b893ba\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 267\n        }\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"import sys\\n\",\n        \"\\n\",\n        \"!test -d texar_repo || git clone https://github.com/asyml/texar.git texar_repo\\n\",\n        \"if not 'texar_repo' in sys.path:\\n\",\n        \"  sys.path += ['texar_repo']\\n\",\n        \"!pip install funcsigs\"\n      ],\n      \"execution_count\": 3,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"Cloning into 'texar_repo'...\\n\",\n            \"remote: Enumerating objects: 10204, done.\\u001b[K\\n\",\n            \"remote: Total 10204 (delta 0), reused 0 (delta 0), pack-reused 10204\\u001b[K\\n\",\n            \"Receiving objects: 100% (10204/10204), 2.30 MiB | 12.21 MiB/s, done.\\n\",\n            \"Resolving deltas: 100% (7774/7774), done.\\n\",\n            \"Collecting funcsigs\\n\",\n            \"  Downloading https://files.pythonhosted.org/packages/69/cb/f5be453359271714c01b9bd06126eaf2e368f1fddfff30818754b5ac2328/funcsigs-1.0.2-py2.py3-none-any.whl\\n\",\n            \"Installing collected packages: funcsigs\\n\",\n            \"Successfully installed funcsigs-1.0.2\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"_IB17muzsBd0\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"34a14a15-84b6-47d0-d286-2ae052c858d0\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 74\n        }\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"\\\"\\\"\\\"\\n\",\n        \"from google_drive_downloader import GoogleDriveDownloader as gdd\\n\",\n        \"\\n\",\n        \"gdd.download_file_from_google_drive(file_id='0BwmD_VLjROrfTHk4NFg2SndKcjQ',\\n\",\n        \"                                    dest_path='./cnn_stories.tgz',\\n\",\n        \"                                    unzip=True)\\n\",\n        \"!tar -zxf cnn_stories.tgz\\n\",\n        \"\\n\",\n        \"from os import listdir\\n\",\n        \"import string\\n\",\n        \"\\n\",\n        \"# load doc into memory\\n\",\n        \"def load_doc(filename):\\n\",\n        \"\\t# open the file as read only\\n\",\n        \"\\tfile = open(filename, encoding='utf-8')\\n\",\n        \"\\t# read all text\\n\",\n        \"\\ttext = file.read()\\n\",\n        \"\\t# close the file\\n\",\n        \"\\tfile.close()\\n\",\n        \"\\treturn text\\n\",\n        \"\\n\",\n        \"# split a document into news story and highlights\\n\",\n        \"def split_story(doc):\\n\",\n        \"\\t# find first highlight\\n\",\n        \"\\tindex = doc.find('@highlight')\\n\",\n        \"\\t# split into story and highlights\\n\",\n        \"\\tstory, highlights = doc[:index], doc[index:].split('@highlight')\\n\",\n        \"\\t# strip extra white space around each highlight\\n\",\n        \"\\thighlights = [h.strip() for h in highlights if len(h) > 0]\\n\",\n        \"\\treturn story, highlights\\n\",\n        \"\\n\",\n        \"# load all stories in a directory\\n\",\n        \"def load_stories(directory):\\n\",\n        \"\\tstories = list()\\n\",\n        \"\\tfor name in listdir(directory):\\n\",\n        \"\\t\\tfilename = directory + '/' + name\\n\",\n        \"\\t\\t# load document\\n\",\n        \"\\t\\tdoc = load_doc(filename)\\n\",\n        \"\\t\\t# split into story and highlights\\n\",\n        \"\\t\\tstory, highlights = split_story(doc)\\n\",\n        \"\\t\\t# store\\n\",\n        \"\\t\\tstories.append({'story':story, 'highlights':highlights})\\n\",\n        \"\\treturn stories\\n\",\n        \"\\n\",\n        \"# clean a list of lines\\n\",\n        \"def clean_lines(lines):\\n\",\n        \"\\tcleaned = list()\\n\",\n        \"\\t# prepare a translation table to remove punctuation\\n\",\n        \"\\ttable = str.maketrans('', '', string.punctuation)\\n\",\n        \"\\tfor line in lines:\\n\",\n        \"\\t\\t# strip source cnn office if it exists\\n\",\n        \"\\t\\tindex = line.find('(CNN) -- ')\\n\",\n        \"\\t\\tif index > -1:\\n\",\n        \"\\t\\t\\tline = line[index+len('(CNN)'):]\\n\",\n        \"\\t\\t# tokenize on white space\\n\",\n        \"\\t\\tline = line.split()\\n\",\n        \"\\t\\t# convert to lower case\\n\",\n        \"\\t\\tline = [word.lower() for word in line]\\n\",\n        \"\\t\\t# remove punctuation from each token\\n\",\n        \"\\t\\tline = [w.translate(table) for w in line]\\n\",\n        \"\\t\\t# remove tokens with numbers in them\\n\",\n        \"\\t\\tline = [word for word in line if word.isalpha()]\\n\",\n        \"\\t\\t# store as string\\n\",\n        \"\\t\\tcleaned.append(' '.join(line))\\n\",\n        \"\\t# remove empty strings\\n\",\n        \"\\tcleaned = [c for c in cleaned if len(c) > 0]\\n\",\n        \"\\treturn cleaned\\n\",\n        \"\\n\",\n        \"# load stories\\n\",\n        \"directory = 'cnn/stories/'\\n\",\n        \"stories = load_stories(directory)\\n\",\n        \"print('Loaded Stories %d' % len(stories))\\n\",\n        \"\\n\",\n        \"# clean stories\\n\",\n        \"f1 = open(\\\"stories.txt\\\",'w')\\n\",\n        \"f2 = open(\\\"summary.txt\\\",'w')\\n\",\n        \"for example in stories:\\n\",\n        \"  example['story'] = clean_lines(example['story'].split('\\\\n'))\\n\",\n        \"  example['highlights'] = clean_lines(example['highlights'])\\n\",\n        \"  f1.write(\\\" \\\".join(example['story']))\\n\",\n        \"  f1.write(\\\"\\\\n\\\")\\n\",\n        \"  f2.write(\\\" \\\".join(example['highlights']))\\n\",\n        \"  f2.write(\\\"\\\\n\\\")\\n\",\n        \"f1.close()\\n\",\n        \"f2.close()\\n\",\n        \"  \\n\",\n        \"story = open(\\\"stories.txt\\\").readlines()\\n\",\n        \"summ = open(\\\"summary.txt\\\").readlines() \\n\",\n        \"train_story = story[0:90000]\\n\",\n        \"train_summ = summ[0:90000]\\n\",\n        \"\\n\",\n        \"eval_story = story[90000:91579]\\n\",\n        \"eval_summ = summ[90000:91579]\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"test_story = story[91579:92579]\\n\",\n        \"test_summ = summ[91579:92579]\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"with open(\\\"train_story.txt\\\",'w') as f:\\n\",\n        \"  f.write(\\\"\\\\n\\\".join(train_story))\\n\",\n        \"  \\n\",\n        \"with open(\\\"train_summ.txt\\\",'w') as f:\\n\",\n        \"  f.write(\\\"\\\\n\\\".join(train_summ))\\n\",\n        \"  \\n\",\n        \"with open(\\\"eval_story.txt\\\",'w') as f:\\n\",\n        \"  f.write(\\\"\\\\n\\\".join(eval_story))\\n\",\n        \"  \\n\",\n        \"  \\n\",\n        \"with open(\\\"eval_summ.txt\\\",'w') as f:\\n\",\n        \"  f.write(\\\"\\\\n\\\".join(eval_summ))\\n\",\n        \"  \\n\",\n        \"  \\n\",\n        \"with open(\\\"test_story.txt\\\",'w') as f:\\n\",\n        \"  f.write(\\\"\\\\n\\\".join(test_story))\\n\",\n        \"  \\n\",\n        \"  \\n\",\n        \"with open(\\\"test_summ.txt\\\",'w') as f:\\n\",\n        \"  f.write(\\\"\\\\n\\\".join(test_summ))  \\n\",\n        \"  \\\"\\\"\\\"\"\n      ],\n      \"execution_count\": 4,\n      \"outputs\": [\n        {\n          \"output_type\": \"execute_result\",\n          \"data\": {\n            \"text/plain\": [\n              \"'\\\\nfrom google_drive_downloader import GoogleDriveDownloader as gdd\\\\n\\\\ngdd.download_file_from_google_drive(file_id=\\\\'0BwmD_VLjROrfTHk4NFg2SndKcjQ\\\\',\\\\n                                    dest_path=\\\\'./cnn_stories.tgz\\\\',\\\\n                                    unzip=True)\\\\n!tar -zxf cnn_stories.tgz\\\\n\\\\nfrom os import listdir\\\\nimport string\\\\n\\\\n# load doc into memory\\\\ndef load_doc(filename):\\\\n\\\\t# open the file as read only\\\\n\\\\tfile = open(filename, encoding=\\\\'utf-8\\\\')\\\\n\\\\t# read all text\\\\n\\\\ttext = file.read()\\\\n\\\\t# close the file\\\\n\\\\tfile.close()\\\\n\\\\treturn text\\\\n\\\\n# split a document into news story and highlights\\\\ndef split_story(doc):\\\\n\\\\t# find first highlight\\\\n\\\\tindex = doc.find(\\\\'@highlight\\\\')\\\\n\\\\t# split into story and highlights\\\\n\\\\tstory, highlights = doc[:index], doc[index:].split(\\\\'@highlight\\\\')\\\\n\\\\t# strip extra white space around each highlight\\\\n\\\\thighlights = [h.strip() for h in highlights if len(h) > 0]\\\\n\\\\treturn story, highlights\\\\n\\\\n# load all stories in a directory\\\\ndef load_stories(directory):\\\\n\\\\tstories = list()\\\\n\\\\tfor name in listdir(directory):\\\\n\\\\t\\\\tfilename = directory + \\\\'/\\\\' + name\\\\n\\\\t\\\\t# load document\\\\n\\\\t\\\\tdoc = load_doc(filename)\\\\n\\\\t\\\\t# split into story and highlights\\\\n\\\\t\\\\tstory, highlights = split_story(doc)\\\\n\\\\t\\\\t# store\\\\n\\\\t\\\\tstories.append({\\\\'story\\\\':story, \\\\'highlights\\\\':highlights})\\\\n\\\\treturn stories\\\\n\\\\n# clean a list of lines\\\\ndef clean_lines(lines):\\\\n\\\\tcleaned = list()\\\\n\\\\t# prepare a translation table to remove punctuation\\\\n\\\\ttable = str.maketrans(\\\\'\\\\', \\\\'\\\\', string.punctuation)\\\\n\\\\tfor line in lines:\\\\n\\\\t\\\\t# strip source cnn office if it exists\\\\n\\\\t\\\\tindex = line.find(\\\\'(CNN) -- \\\\')\\\\n\\\\t\\\\tif index > -1:\\\\n\\\\t\\\\t\\\\tline = line[index+len(\\\\'(CNN)\\\\'):]\\\\n\\\\t\\\\t# tokenize on white space\\\\n\\\\t\\\\tline = line.split()\\\\n\\\\t\\\\t# convert to lower case\\\\n\\\\t\\\\tline = [word.lower() for word in line]\\\\n\\\\t\\\\t# remove punctuation from each token\\\\n\\\\t\\\\tline = [w.translate(table) for w in line]\\\\n\\\\t\\\\t# remove tokens with numbers in them\\\\n\\\\t\\\\tline = [word for word in line if word.isalpha()]\\\\n\\\\t\\\\t# store as string\\\\n\\\\t\\\\tcleaned.append(\\\\' \\\\'.join(line))\\\\n\\\\t# remove empty strings\\\\n\\\\tcleaned = [c for c in cleaned if len(c) > 0]\\\\n\\\\treturn cleaned\\\\n\\\\n# load stories\\\\ndirectory = \\\\'cnn/stories/\\\\'\\\\nstories = load_stories(directory)\\\\nprint(\\\\'Loaded Stories %d\\\\' % len(stories))\\\\n\\\\n# clean stories\\\\nf1 = open(\\\"stories.txt\\\",\\\\'w\\\\')\\\\nf2 = open(\\\"summary.txt\\\",\\\\'w\\\\')\\\\nfor example in stories:\\\\n  example[\\\\'story\\\\'] = clean_lines(example[\\\\'story\\\\'].split(\\\\'\\\\n\\\\'))\\\\n  example[\\\\'highlights\\\\'] = clean_lines(example[\\\\'highlights\\\\'])\\\\n  f1.write(\\\" \\\".join(example[\\\\'story\\\\']))\\\\n  f1.write(\\\"\\\\n\\\")\\\\n  f2.write(\\\" \\\".join(example[\\\\'highlights\\\\']))\\\\n  f2.write(\\\"\\\\n\\\")\\\\nf1.close()\\\\nf2.close()\\\\n  \\\\nstory = open(\\\"stories.txt\\\").readlines()\\\\nsumm = open(\\\"summary.txt\\\").readlines() \\\\ntrain_story = story[0:90000]\\\\ntrain_summ = summ[0:90000]\\\\n\\\\neval_story = story[90000:91579]\\\\neval_summ = summ[90000:91579]\\\\n\\\\n\\\\ntest_story = story[91579:92579]\\\\ntest_summ = summ[91579:92579]\\\\n\\\\n\\\\nwith open(\\\"train_story.txt\\\",\\\\'w\\\\') as f:\\\\n  f.write(\\\"\\\\n\\\".join(train_story))\\\\n  \\\\nwith open(\\\"train_summ.txt\\\",\\\\'w\\\\') as f:\\\\n  f.write(\\\"\\\\n\\\".join(train_summ))\\\\n  \\\\nwith open(\\\"eval_story.txt\\\",\\\\'w\\\\') as f:\\\\n  f.write(\\\"\\\\n\\\".join(eval_story))\\\\n  \\\\n  \\\\nwith open(\\\"eval_summ.txt\\\",\\\\'w\\\\') as f:\\\\n  f.write(\\\"\\\\n\\\".join(eval_summ))\\\\n  \\\\n  \\\\nwith open(\\\"test_story.txt\\\",\\\\'w\\\\') as f:\\\\n  f.write(\\\"\\\\n\\\".join(test_story))\\\\n  \\\\n  \\\\nwith open(\\\"test_summ.txt\\\",\\\\'w\\\\') as f:\\\\n  f.write(\\\"\\\\n\\\".join(test_summ))  \\\\n  '\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          },\n          \"execution_count\": 4\n        }\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"SsWJmIfmij-_\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"import os\\n\",\n        \"import csv\\n\",\n        \"import collections\\n\",\n        \"import sys\\n\",\n        \"from texar_repo.examples.bert.utils import data_utils, model_utils, tokenization\\n\",\n        \"import importlib\\n\",\n        \"import tensorflow as tf\\n\",\n        \"import texar as tx \\n\",\n        \"from texar_repo.examples.bert import config_classifier as config_downstream\\n\",\n        \"from texar_repo.texar.utils import transformer_utils\\n\",\n        \"from texar_repo.examples.transformer.utils import data_utils, utils\\n\",\n        \"from texar_repo.examples.transformer.bleu_tool import bleu_wrapper\\n\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"metadata\": {\n        \"id\": \"Ko2McfcdhbcN\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"#config\\n\",\n        \"\\n\",\n        \"dcoder_config = {\\n\",\n        \"    'dim': 768,\\n\",\n        \"    'num_blocks': 6,\\n\",\n        \"    'multihead_attention': {\\n\",\n        \"        'num_heads': 8,\\n\",\n        \"        'output_dim': 768\\n\",\n        \"        # See documentation for more optional hyperparameters\\n\",\n        \"    },\\n\",\n        \"    'position_embedder_hparams': {\\n\",\n        \"        'dim': 768\\n\",\n        \"    },\\n\",\n        \"    'initializer': {\\n\",\n        \"        'type': 'variance_scaling_initializer',\\n\",\n        \"        'kwargs': {\\n\",\n        \"            'scale': 1.0,\\n\",\n        \"            'mode': 'fan_avg',\\n\",\n        \"            'distribution': 'uniform',\\n\",\n        \"        },\\n\",\n        \"    },\\n\",\n        \"    'poswise_feedforward': tx.modules.default_transformer_poswise_net_hparams(\\n\",\n        \"        output_dim=768)\\n\",\n        \"}\\n\",\n        \"\\n\",\n        \"loss_label_confidence = 0.9\\n\",\n        \"\\n\",\n        \"random_seed = 1234\\n\",\n        \"beam_width = 5\\n\",\n        \"alpha = 0.6\\n\",\n        \"hidden_dim = 768\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"opt = {\\n\",\n        \"    'optimizer': {\\n\",\n        \"        'type': 'AdamOptimizer',\\n\",\n        \"        'kwargs': {\\n\",\n        \"            'beta1': 0.9,\\n\",\n        \"            'beta2': 0.997,\\n\",\n        \"            'epsilon': 1e-9\\n\",\n        \"        }\\n\",\n        \"    }\\n\",\n        \"}\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"lr = {\\n\",\n        \"    'learning_rate_schedule': 'constant.linear_warmup.rsqrt_decay.rsqrt_depth',\\n\",\n        \"    'lr_constant': 2 * (hidden_dim ** -0.5),\\n\",\n        \"    'static_lr': 1e-3,\\n\",\n        \"    'warmup_steps': 2000,\\n\",\n        \"}\\n\",\n        \"\\n\",\n        \"bos_token_id =101\\n\",\n        \"eos_token_id = 102\\n\",\n        \"\\n\",\n        \"model_dir= \\\"./models\\\"\\n\",\n        \"run_mode= \\\"train_and_evaluate\\\"\\n\",\n        \"batch_size = 32\\n\",\n        \"test_batch_size = 32\\n\",\n        \"\\n\",\n        \"max_train_epoch = 20\\n\",\n        \"display_steps = 100\\n\",\n        \"eval_steps = 100000\\n\",\n        \"\\n\",\n        \"max_decoding_length = 400\\n\",\n        \"\\n\",\n        \"max_seq_length_src = 512\\n\",\n        \"max_seq_length_tgt = 400\\n\",\n        \"\\n\",\n        \"bert_pretrain_dir = 'bert_pretrained_models/uncased_L-12_H-768_A-12'\\n\",\n        \"#config\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"metadata\": {\n        \"id\": \"MrBw61rEiXeE\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"!mkdir bert_pretrained_models\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"metadata\": {\n        \"id\": \"FH2RX773i40g\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"3e61c01b-e31b-485b-89e0-b51fddc341d4\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 354\n        }\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"!wget https://storage.googleapis.com/bert_models/2018_10_18/uncased_L-12_H-768_A-12.zip -P bert_pretrained_models/;\\n\",\n        \"!unzip bert_pretrained_models/uncased_L-12_H-768_A-12.zip -d bert_pretrained_models/\\n\"\n      ],\n      \"execution_count\": 11,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"--2019-01-31 01:48:29--  https://storage.googleapis.com/bert_models/2018_10_18/uncased_L-12_H-768_A-12.zip\\n\",\n            \"Resolving storage.googleapis.com (storage.googleapis.com)... 64.233.181.128, 2607:f8b0:4001:c0f::80\\n\",\n            \"Connecting to storage.googleapis.com (storage.googleapis.com)|64.233.181.128|:443... connected.\\n\",\n            \"HTTP request sent, awaiting response... 200 OK\\n\",\n            \"Length: 407727028 (389M) [application/zip]\\n\",\n            \"Saving to: ‘bert_pretrained_models/uncased_L-12_H-768_A-12.zip’\\n\",\n            \"\\n\",\n            \"uncased_L-12_H-768_ 100%[===================>] 388.84M   136MB/s    in 2.9s    \\n\",\n            \"\\n\",\n            \"2019-01-31 01:48:32 (136 MB/s) - ‘bert_pretrained_models/uncased_L-12_H-768_A-12.zip’ saved [407727028/407727028]\\n\",\n            \"\\n\",\n            \"Archive:  bert_pretrained_models/uncased_L-12_H-768_A-12.zip\\n\",\n            \"   creating: bert_pretrained_models/uncased_L-12_H-768_A-12/\\n\",\n            \"  inflating: bert_pretrained_models/uncased_L-12_H-768_A-12/bert_model.ckpt.meta  \\n\",\n            \"  inflating: bert_pretrained_models/uncased_L-12_H-768_A-12/bert_model.ckpt.data-00000-of-00001  \\n\",\n            \"  inflating: bert_pretrained_models/uncased_L-12_H-768_A-12/vocab.txt  \\n\",\n            \"  inflating: bert_pretrained_models/uncased_L-12_H-768_A-12/bert_model.ckpt.index  \\n\",\n            \"  inflating: bert_pretrained_models/uncased_L-12_H-768_A-12/bert_config.json  \\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"JC60vOxGjI-M\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"class InputExample():\\n\",\n        \"    \\\"\\\"\\\"A single training/test example for simple sequence classification.\\\"\\\"\\\"\\n\",\n        \"\\n\",\n        \"    def __init__(self, guid, text_a, text_b=None):\\n\",\n        \"        \\\"\\\"\\\"Constructs a InputExample.\\n\",\n        \"        Args:\\n\",\n        \"            guid: Unique id for the example.\\n\",\n        \"            text_a: string. The untokenized text of the first sequence.\\n\",\n        \"                For single sequence tasks, only this sequence must be specified.\\n\",\n        \"            text_b: (Optional) string. The untokenized text of the second\\n\",\n        \"                sequence. Only must be specified for sequence pair tasks.\\n\",\n        \"            label: (Optional) string. The label of the example. This should be\\n\",\n        \"                specified for train and dev examples, but not for test examples.\\n\",\n        \"        \\\"\\\"\\\"\\n\",\n        \"        self.guid = guid\\n\",\n        \"        self.src_txt = text_a\\n\",\n        \"        self.tgt_txt = text_b\\n\",\n        \"        \\n\",\n        \"class InputFeatures():\\n\",\n        \"    \\\"\\\"\\\"A single set of features of data.\\\"\\\"\\\"\\n\",\n        \"\\n\",\n        \"    def __init__(self, src_input_ids,src_input_mask,src_segment_ids,tgt_input_ids,tgt_input_mask,tgt_labels):\\n\",\n        \"        self.src_input_ids = src_input_ids\\n\",\n        \"        self.src_input_mask = src_input_mask\\n\",\n        \"        self.src_segment_ids = src_segment_ids\\n\",\n        \"        self.tgt_input_ids = tgt_input_ids\\n\",\n        \"        self.tgt_input_mask = tgt_input_mask \\n\",\n        \"        self.tgt_labels = tgt_labels\\n\",\n        \"        \\n\",\n        \"       \\n\",\n        \"class DataProcessor(object):\\n\",\n        \"    \\\"\\\"\\\"Base class for data converters for sequence classification data sets.\\\"\\\"\\\"\\n\",\n        \"\\n\",\n        \"    def get_train_examples(self, data_dir):\\n\",\n        \"        \\\"\\\"\\\"Gets a collection of `InputExample`s for the train set.\\\"\\\"\\\"\\n\",\n        \"        raise NotImplementedError()\\n\",\n        \"\\n\",\n        \"    def get_dev_examples(self, data_dir):\\n\",\n        \"        \\\"\\\"\\\"Gets a collection of `InputExample`s for the dev set.\\\"\\\"\\\"\\n\",\n        \"        raise NotImplementedError()\\n\",\n        \"\\n\",\n        \"    def get_test_examples(self, data_dir):\\n\",\n        \"        \\\"\\\"\\\"Gets a collection of `InputExample`s for prediction.\\\"\\\"\\\"\\n\",\n        \"        raise NotImplementedError()\\n\",\n        \"\\n\",\n        \"    def get_labels(self):\\n\",\n        \"        \\\"\\\"\\\"Gets the list of labels for this data set.\\\"\\\"\\\"\\n\",\n        \"        raise NotImplementedError()\\n\",\n        \"\\n\",\n        \"    @classmethod\\n\",\n        \"    def _read_tsv(cls, input_file, quotechar=None):\\n\",\n        \"        \\\"\\\"\\\"Reads a tab separated value file.\\\"\\\"\\\"\\n\",\n        \"        with tf.gfile.Open(input_file, \\\"r\\\") as f:\\n\",\n        \"            reader = csv.reader(f, delimiter=\\\"\\\\t\\\", quotechar=quotechar)\\n\",\n        \"            lines = []\\n\",\n        \"            i = 0\\n\",\n        \"            for line in reader:\\n\",\n        \"                lines.append(line)\\n\",\n        \"        return lines\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"    @classmethod\\n\",\n        \"    def _read_file(cls, input_file, quotechar=None):\\n\",\n        \"        \\\"\\\"\\\"Reads a tab separated value file.\\\"\\\"\\\"\\n\",\n        \"        with tf.gfile.Open(input_file, \\\"r\\\") as f:\\n\",\n        \"            reader = csv.reader(f, delimiter=\\\"\\\\n\\\", quotechar=quotechar)\\n\",\n        \"            lines = []\\n\",\n        \"            i = 0\\n\",\n        \"            for line in reader:\\n\",\n        \"                lines.append(line)\\n\",\n        \"        return lines\\n\",\n        \"      \\n\",\n        \"      \\n\",\n        \"class CNNDailymail(DataProcessor):\\n\",\n        \"    \\\"\\\"\\\"Processor for the CoLA data set (GLUE version).\\\"\\\"\\\"\\n\",\n        \"\\n\",\n        \"    def get_train_examples(self, data_dir):\\n\",\n        \"        \\\"\\\"\\\"See base class.\\\"\\\"\\\"\\n\",\n        \"        return self._create_examples(\\n\",\n        \"            self._read_file(os.path.join(data_dir, \\\"train_story.txt\\\")),self._read_file(os.path.join(data_dir, \\\"train_summ.txt\\\")),\\n\",\n        \"            \\\"train\\\")\\n\",\n        \"\\n\",\n        \"    def get_dev_examples(self, data_dir):\\n\",\n        \"        \\\"\\\"\\\"See base class.\\\"\\\"\\\"\\n\",\n        \"        return self._create_examples(\\n\",\n        \"            self._read_file(os.path.join(data_dir, \\\"eval_story.txt\\\")),self._read_file(os.path.join(data_dir, \\\"eval_summ.txt\\\")),\\n\",\n        \"            \\\"dev\\\")\\n\",\n        \"\\n\",\n        \"    def get_test_examples(self, data_dir):\\n\",\n        \"        \\\"\\\"\\\"See base class.\\\"\\\"\\\"\\n\",\n        \"        return self._create_examples(\\n\",\n        \"            self._read_file(os.path.join(data_dir, \\\"test_story.txt\\\")),self._read_file(os.path.join(data_dir, \\\"test_summ.txt\\\")),\\n\",\n        \"            \\\"test\\\")\\n\",\n        \"\\n\",\n        \"    def _create_examples(self, src_lines,tgt_lines,set_type):\\n\",\n        \"        examples = [] \\n\",\n        \"        for i,data in enumerate(zip(src_lines,tgt_lines)):\\n\",\n        \"            guid = \\\"%s-%s\\\" % (set_type, i)\\n\",\n        \"            if set_type == \\\"test\\\" and i == 0:\\n\",\n        \"                continue\\n\",\n        \"            else:\\n\",\n        \"                #print(data)\\n\",\n        \"                if len(data[0])==0 or len(data[1])==0:\\n\",\n        \"                  continue\\n\",\n        \"                src_lines = tokenization.convert_to_unicode(data[0][0])\\n\",\n        \"                tgt_lines = tokenization.convert_to_unicode(data[1][0])\\n\",\n        \"                examples.append(InputExample(guid=guid, text_a=src_lines,\\n\",\n        \"                                         text_b=tgt_lines))\\n\",\n        \"        return examples\\n\",\n        \"  \\n\",\n        \"  \\n\",\n        \"def file_based_convert_examples_to_features(\\n\",\n        \"        examples, max_seq_length_src,max_seq_length_tgt,tokenizer, output_file):\\n\",\n        \"    \\\"\\\"\\\"Convert a set of `InputExample`s to a TFRecord file.\\\"\\\"\\\"\\n\",\n        \"\\n\",\n        \"    writer = tf.python_io.TFRecordWriter(output_file)\\n\",\n        \"\\n\",\n        \"    for (ex_index, example) in enumerate(examples):\\n\",\n        \"        #print(\\\"ex_index\\\",ex_index)\\n\",\n        \"\\n\",\n        \"        if (ex_index+1) %1000 == 0 :\\n\",\n        \"          print(\\\"------------processed..{}...examples\\\".format(ex_index))\\n\",\n        \"          \\n\",\n        \"        feature = convert_single_example(ex_index, example,\\n\",\n        \"                                         max_seq_length_src,max_seq_length_tgt,tokenizer)\\n\",\n        \"\\n\",\n        \"        def create_int_feature(values):\\n\",\n        \"            return tf.train.Feature(\\n\",\n        \"                int64_list=tf.train.Int64List(value=list(values)))\\n\",\n        \"\\n\",\n        \"        features = collections.OrderedDict()\\n\",\n        \"        features[\\\"src_input_ids\\\"] = create_int_feature(feature.src_input_ids)\\n\",\n        \"        features[\\\"src_input_mask\\\"] = create_int_feature(feature.src_input_mask)\\n\",\n        \"        features[\\\"src_segment_ids\\\"] = create_int_feature(feature.src_segment_ids)\\n\",\n        \"\\n\",\n        \"        features[\\\"tgt_input_ids\\\"] = create_int_feature(feature.tgt_input_ids)\\n\",\n        \"        features[\\\"tgt_input_mask\\\"] = create_int_feature(feature.tgt_input_mask)\\n\",\n        \"        features['tgt_labels'] = create_int_feature(feature.tgt_labels)\\n\",\n        \"        \\n\",\n        \"        \\n\",\n        \"        \\n\",\n        \"        #print(feature.tgt_labels)\\n\",\n        \"        \\n\",\n        \"\\n\",\n        \"        tf_example = tf.train.Example(\\n\",\n        \"            features=tf.train.Features(feature=features))\\n\",\n        \"        writer.write(tf_example.SerializeToString())\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"def convert_single_example(ex_index, example, max_seq_length_src,max_seq_length_tgt,\\n\",\n        \"                           tokenizer):\\n\",\n        \"    \\\"\\\"\\\"Converts a single `InputExample` into a single `InputFeatures`.\\\"\\\"\\\"\\n\",\n        \"    \\\"\\\"\\\"\\n\",\n        \"    label_map = {}\\n\",\n        \"    for (i, label) in enumerate(label_list):\\n\",\n        \"        label_map[label] = i\\n\",\n        \"    \\\"\\\"\\\"\\n\",\n        \"    tokens_a = tokenizer.tokenize(example.src_txt)\\n\",\n        \"    tokens_b = tokenizer.tokenize(example.tgt_txt)\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"    # Modifies `tokens_a` and `tokens_b` in place so that the total\\n\",\n        \"    # length is less than the specified length.\\n\",\n        \"    # Account for [CLS], [SEP], [SEP] with \\\"- 3\\\"\\n\",\n        \"    if len(tokens_a) > max_seq_length_src - 2:\\n\",\n        \"            tokens_a = tokens_a[0:(max_seq_length_src - 2)]\\n\",\n        \"    \\n\",\n        \"    if len(tokens_b) > max_seq_length_tgt - 2:\\n\",\n        \"            tokens_b = tokens_b[0:(max_seq_length_tgt - 2)]\\n\",\n        \"\\n\",\n        \"    \\n\",\n        \"    tokens_src = []\\n\",\n        \"    segment_ids_src = []\\n\",\n        \"    tokens_src.append(\\\"[CLS]\\\")\\n\",\n        \"    segment_ids_src.append(0)\\n\",\n        \"    for token in tokens_a:\\n\",\n        \"        tokens_src.append(token)\\n\",\n        \"        segment_ids_src.append(0)\\n\",\n        \"    tokens_src.append(\\\"[SEP]\\\")\\n\",\n        \"    segment_ids_src.append(0)\\n\",\n        \"  \\n\",\n        \"\\n\",\n        \"    tokens_tgt = []\\n\",\n        \"    segment_ids_tgt = []\\n\",\n        \"    tokens_tgt.append(\\\"[CLS]\\\")\\n\",\n        \"    #segment_ids_tgt.append(0)\\n\",\n        \"    for token in tokens_b:\\n\",\n        \"        tokens_tgt.append(token)\\n\",\n        \"        #segment_ids_tgt.append(0)\\n\",\n        \"    tokens_tgt.append(\\\"[SEP]\\\")\\n\",\n        \"    #segment_ids_tgt.append(0)\\n\",\n        \"\\n\",\n        \"    input_ids_src = tokenizer.convert_tokens_to_ids(tokens_src)\\n\",\n        \"   \\n\",\n        \"    \\n\",\n        \"\\n\",\n        \"    input_ids_tgt = tokenizer.convert_tokens_to_ids(tokens_tgt)\\n\",\n        \"    \\n\",\n        \"    #Adding begiining and end token\\n\",\n        \"    input_ids_tgt = input_ids_tgt[:-1] \\n\",\n        \"    \\n\",\n        \"    input_mask_src = [1] * len(input_ids_src)\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"    input_mask_tgt = [1] * len(input_ids_tgt)\\n\",\n        \"    \\n\",\n        \"    labels_tgt = input_ids_tgt[1:]\\n\",\n        \"    \\n\",\n        \"    \\n\",\n        \"    labels_tgt.append(0)\\n\",\n        \"    \\n\",\n        \"    #print(len(input_ids_tgt))\\n\",\n        \"    #print(len(input_mask_tgt))\\n\",\n        \"    #print(len(labels_tgt))\\n\",\n        \"    #print(len(segment_ids_tgt))\\n\",\n        \"    \\n\",\n        \"    while len(input_ids_src) < max_seq_length_src:\\n\",\n        \"        input_ids_src.append(0)\\n\",\n        \"        input_mask_src.append(0)\\n\",\n        \"        segment_ids_src.append(0)\\n\",\n        \"\\n\",\n        \"    while len(input_ids_tgt) < max_seq_length_tgt:\\n\",\n        \"        input_ids_tgt.append(0)\\n\",\n        \"        input_mask_tgt.append(0)\\n\",\n        \"        segment_ids_tgt.append(0)\\n\",\n        \"        labels_tgt.append(0)\\n\",\n        \"\\n\",\n        \"    feature = InputFeatures( src_input_ids=input_ids_src,src_input_mask=input_mask_src,src_segment_ids=segment_ids_src,\\n\",\n        \"        tgt_input_ids=input_ids_tgt,tgt_input_mask=input_mask_tgt,tgt_labels=labels_tgt)\\n\",\n        \"\\n\",\n        \"    \\n\",\n        \"    return feature\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"def file_based_input_fn_builder(input_file, max_seq_length_src,max_seq_length_tgt, is_training,\\n\",\n        \"                                drop_remainder, is_distributed=False):\\n\",\n        \"    \\\"\\\"\\\"Creates an `input_fn` closure to be passed to TPUEstimator.\\\"\\\"\\\"\\n\",\n        \"\\n\",\n        \"    name_to_features = {\\n\",\n        \"        \\\"src_input_ids\\\": tf.FixedLenFeature([max_seq_length_src], tf.int64),\\n\",\n        \"        \\\"src_input_mask\\\": tf.FixedLenFeature([max_seq_length_src], tf.int64),\\n\",\n        \"        \\\"src_segment_ids\\\": tf.FixedLenFeature([max_seq_length_src], tf.int64),\\n\",\n        \"        \\\"tgt_input_ids\\\": tf.FixedLenFeature([max_seq_length_tgt], tf.int64),\\n\",\n        \"        \\\"tgt_input_mask\\\": tf.FixedLenFeature([max_seq_length_tgt], tf.int64),\\n\",\n        \"        \\\"tgt_labels\\\" : tf.FixedLenFeature([max_seq_length_tgt], tf.int64),\\n\",\n        \"        \\n\",\n        \"        \\n\",\n        \"    }\\n\",\n        \"\\n\",\n        \"    def _decode_record(record, name_to_features):\\n\",\n        \"        \\\"\\\"\\\"Decodes a record to a TensorFlow example.\\\"\\\"\\\"\\n\",\n        \"        example = tf.parse_single_example(record, name_to_features)\\n\",\n        \"        print(example)\\n\",\n        \"        print(example.keys())\\n\",\n        \"\\n\",\n        \"        # tf.Example only supports tf.int64, but the TPU only supports tf.int32.\\n\",\n        \"        # So cast all int64 to int32.\\n\",\n        \"        for name in list(example.keys()):\\n\",\n        \"            t = example[name]\\n\",\n        \"            if t.dtype == tf.int64:\\n\",\n        \"                t = tf.to_int32(t)\\n\",\n        \"            example[name] = t\\n\",\n        \"\\n\",\n        \"        return example\\n\",\n        \"\\n\",\n        \"    def input_fn(params):\\n\",\n        \"        \\\"\\\"\\\"The actual input function.\\\"\\\"\\\"\\n\",\n        \"        batch_size = params[\\\"batch_size\\\"]\\n\",\n        \"\\n\",\n        \"        # For training, we want a lot of parallel reading and shuffling.\\n\",\n        \"        # For eval, we want no shuffling and parallel reading doesn't matter.\\n\",\n        \"        d = tf.data.TFRecordDataset(input_file)\\n\",\n        \"        if is_training:\\n\",\n        \"\\n\",\n        \"            if is_distributed:\\n\",\n        \"                import horovod.tensorflow as hvd\\n\",\n        \"                tf.logging.info('distributed mode is enabled.'\\n\",\n        \"                                'size:{} rank:{}'.format(hvd.size(), hvd.rank()))\\n\",\n        \"                # https://github.com/uber/horovod/issues/223\\n\",\n        \"                d = d.shard(hvd.size(), hvd.rank())\\n\",\n        \"\\n\",\n        \"                d = d.repeat()\\n\",\n        \"                d = d.shuffle(buffer_size=100)\\n\",\n        \"                d = d.apply(\\n\",\n        \"                    tf.contrib.data.map_and_batch(\\n\",\n        \"                        lambda record: _decode_record(record, name_to_features),\\n\",\n        \"                        batch_size=batch_size//hvd.size(),\\n\",\n        \"                        drop_remainder=drop_remainder))\\n\",\n        \"            else:\\n\",\n        \"                tf.logging.info('distributed mode is not enabled.')\\n\",\n        \"                d = d.repeat()\\n\",\n        \"                d = d.shuffle(buffer_size=100)\\n\",\n        \"                d = d.apply(\\n\",\n        \"                    tf.contrib.data.map_and_batch(\\n\",\n        \"                        lambda record: _decode_record(record, name_to_features),\\n\",\n        \"                        batch_size=batch_size,\\n\",\n        \"                        drop_remainder=drop_remainder))\\n\",\n        \"\\n\",\n        \"        else:\\n\",\n        \"            d = d.apply(\\n\",\n        \"                tf.contrib.data.map_and_batch(\\n\",\n        \"                    lambda record: _decode_record(record, name_to_features),\\n\",\n        \"                    batch_size=batch_size,\\n\",\n        \"                    drop_remainder=drop_remainder))\\n\",\n        \"\\n\",\n        \"        return d\\n\",\n        \"    return input_fn\\n\",\n        \"  \\n\",\n        \"  \\n\",\n        \"def get_dataset(processor,\\n\",\n        \"                tokenizer,\\n\",\n        \"                data_dir,\\n\",\n        \"                max_seq_length_src,\\n\",\n        \"                max_seq_length_tgt,\\n\",\n        \"                batch_size,\\n\",\n        \"                mode,\\n\",\n        \"                output_dir,\\n\",\n        \"                is_distributed=False):\\n\",\n        \"    \\\"\\\"\\\"\\n\",\n        \"    Args:\\n\",\n        \"        processor: Data Preprocessor, must have get_lables,\\n\",\n        \"            get_train/dev/test/examples methods defined.\\n\",\n        \"        tokenizer: The Sentence Tokenizer. Generally should be\\n\",\n        \"            SentencePiece Model.\\n\",\n        \"        data_dir: The input data directory.\\n\",\n        \"        max_seq_length: Max sequence length.\\n\",\n        \"        batch_size: mini-batch size.\\n\",\n        \"        model: `train`, `eval` or `test`.\\n\",\n        \"        output_dir: The directory to save the TFRecords in.\\n\",\n        \"    \\\"\\\"\\\"\\n\",\n        \"    #label_list = processor.get_labels()\\n\",\n        \"    if mode == 'train':\\n\",\n        \"        #train_examples = processor.get_train_examples(data_dir)\\n\",\n        \"        #train_file = os.path.join(output_dir, \\\"train.tf_record\\\")\\n\",\n        \"        train_file = \\\"gs://bert_summ/train.tf_record\\\"\\n\",\n        \"        #file_based_convert_examples_to_features(\\n\",\n        \"        #    train_examples, max_seq_length_src,max_seq_length_tgt,\\n\",\n        \"        #    tokenizer, train_file)\\n\",\n        \"        dataset = file_based_input_fn_builder(\\n\",\n        \"            input_file=train_file,\\n\",\n        \"            max_seq_length_src=max_seq_length_src,\\n\",\n        \"            max_seq_length_tgt =max_seq_length_tgt,\\n\",\n        \"            is_training=True,\\n\",\n        \"            drop_remainder=True,\\n\",\n        \"            is_distributed=is_distributed)({'batch_size': batch_size})\\n\",\n        \"    elif mode == 'eval':\\n\",\n        \"        #eval_examples = processor.get_dev_examples(data_dir)\\n\",\n        \"        #eval_file = os.path.join(output_dir, \\\"eval.tf_record\\\")\\n\",\n        \"        eval_file = \\\"gs://bert_summ/eval.tf_record\\\"\\n\",\n        \"        #file_based_convert_examples_to_features(\\n\",\n        \"        #    eval_examples, max_seq_length_src,max_seq_length_tgt,\\n\",\n        \"        #    tokenizer, eval_file)\\n\",\n        \"        dataset = file_based_input_fn_builder(\\n\",\n        \"            input_file=eval_file,\\n\",\n        \"            max_seq_length_src=max_seq_length_src,\\n\",\n        \"            max_seq_length_tgt =max_seq_length_tgt,\\n\",\n        \"            is_training=False,\\n\",\n        \"            drop_remainder=True,\\n\",\n        \"            is_distributed=is_distributed)({'batch_size': batch_size})\\n\",\n        \"    elif mode == 'test':\\n\",\n        \"      \\n\",\n        \"        #test_examples = processor.get_test_examples(data_dir)\\n\",\n        \"        #test_file = os.path.join(output_dir, \\\"predict.tf_record\\\")\\n\",\n        \"        test_file = \\\"gs://bert_summ/predict.tf_record\\\"\\n\",\n        \"        \\n\",\n        \"        #file_based_convert_examples_to_features(\\n\",\n        \"        #    test_examples, max_seq_length_src,max_seq_length_tgt,\\n\",\n        \"        #    tokenizer, test_file)\\n\",\n        \"        dataset = file_based_input_fn_builder(\\n\",\n        \"            input_file=test_file,\\n\",\n        \"            max_seq_length_src=max_seq_length_src,\\n\",\n        \"            max_seq_length_tgt =max_seq_length_tgt,\\n\",\n        \"            is_training=False,\\n\",\n        \"            drop_remainder=True,\\n\",\n        \"            is_distributed=is_distributed)({'batch_size': batch_size})\\n\",\n        \"    return dataset\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"metadata\": {\n        \"id\": \"Nn4vhTvJjT0D\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"f4080019-27c1-4f01-e498-a2b1d7600beb\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 227\n        }\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"bert_config = model_utils.transform_bert_to_texar_config(\\n\",\n        \"            os.path.join(bert_pretrain_dir, 'bert_config.json'))\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"tokenizer = tokenization.FullTokenizer(\\n\",\n        \"        vocab_file=os.path.join(bert_pretrain_dir, 'vocab.txt'),\\n\",\n        \"        do_lower_case=True)\\n\",\n        \"\\n\",\n        \"vocab_size = len(tokenizer.vocab)\\n\",\n        \"\\n\",\n        \"processor = CNNDailymail()\\n\",\n        \"train_dataset = get_dataset(processor,tokenizer,\\\"./\\\",max_seq_length_src,max_seq_length_tgt,4,'train',\\\"./\\\")\\n\",\n        \"eval_dataset = get_dataset(processor,tokenizer,\\\"./\\\",max_seq_length_src,max_seq_length_tgt,4,'eval',\\\"./\\\")\\n\",\n        \"test_dataset = get_dataset(processor,tokenizer,\\\"./\\\",max_seq_length_src,max_seq_length_tgt,4,'test',\\\"./\\\")\\n\",\n        \"#del processor\"\n      ],\n      \"execution_count\": 13,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:tensorflow:distributed mode is not enabled.\\n\",\n            \"WARNING:tensorflow:From <ipython-input-12-3918a62c9cd3>:297: map_and_batch (from tensorflow.contrib.data.python.ops.batching) is deprecated and will be removed in a future version.\\n\",\n            \"Instructions for updating:\\n\",\n            \"Use `tf.data.experimental.map_and_batch(...)`.\\n\",\n            \"{'src_input_ids': <tf.Tensor 'ParseSingleExample/ParseSingleExample:0' shape=(512,) dtype=int64>, 'src_input_mask': <tf.Tensor 'ParseSingleExample/ParseSingleExample:1' shape=(512,) dtype=int64>, 'src_segment_ids': <tf.Tensor 'ParseSingleExample/ParseSingleExample:2' shape=(512,) dtype=int64>, 'tgt_input_ids': <tf.Tensor 'ParseSingleExample/ParseSingleExample:3' shape=(400,) dtype=int64>, 'tgt_input_mask': <tf.Tensor 'ParseSingleExample/ParseSingleExample:4' shape=(400,) dtype=int64>, 'tgt_labels': <tf.Tensor 'ParseSingleExample/ParseSingleExample:5' shape=(400,) dtype=int64>}\\n\",\n            \"dict_keys(['src_input_ids', 'src_input_mask', 'src_segment_ids', 'tgt_input_ids', 'tgt_input_mask', 'tgt_labels'])\\n\",\n            \"{'src_input_ids': <tf.Tensor 'ParseSingleExample/ParseSingleExample:0' shape=(512,) dtype=int64>, 'src_input_mask': <tf.Tensor 'ParseSingleExample/ParseSingleExample:1' shape=(512,) dtype=int64>, 'src_segment_ids': <tf.Tensor 'ParseSingleExample/ParseSingleExample:2' shape=(512,) dtype=int64>, 'tgt_input_ids': <tf.Tensor 'ParseSingleExample/ParseSingleExample:3' shape=(400,) dtype=int64>, 'tgt_input_mask': <tf.Tensor 'ParseSingleExample/ParseSingleExample:4' shape=(400,) dtype=int64>, 'tgt_labels': <tf.Tensor 'ParseSingleExample/ParseSingleExample:5' shape=(400,) dtype=int64>}\\n\",\n            \"dict_keys(['src_input_ids', 'src_input_mask', 'src_segment_ids', 'tgt_input_ids', 'tgt_input_mask', 'tgt_labels'])\\n\",\n            \"{'src_input_ids': <tf.Tensor 'ParseSingleExample/ParseSingleExample:0' shape=(512,) dtype=int64>, 'src_input_mask': <tf.Tensor 'ParseSingleExample/ParseSingleExample:1' shape=(512,) dtype=int64>, 'src_segment_ids': <tf.Tensor 'ParseSingleExample/ParseSingleExample:2' shape=(512,) dtype=int64>, 'tgt_input_ids': <tf.Tensor 'ParseSingleExample/ParseSingleExample:3' shape=(400,) dtype=int64>, 'tgt_input_mask': <tf.Tensor 'ParseSingleExample/ParseSingleExample:4' shape=(400,) dtype=int64>, 'tgt_labels': <tf.Tensor 'ParseSingleExample/ParseSingleExample:5' shape=(400,) dtype=int64>}\\n\",\n            \"dict_keys(['src_input_ids', 'src_input_mask', 'src_segment_ids', 'tgt_input_ids', 'tgt_input_mask', 'tgt_labels'])\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"kDhy0XGlIcY2\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"del processor\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"metadata\": {\n        \"id\": \"CBXxrBteAuVj\",\n        \"colab_type\": \"code\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 34\n        },\n        \"outputId\": \"424fc4a8-d889-412f-ae34-766b961ce7ab\"\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"vocab_size\"\n      ],\n      \"execution_count\": 15,\n      \"outputs\": [\n        {\n          \"output_type\": \"execute_result\",\n          \"data\": {\n            \"text/plain\": [\n              \"30522\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          },\n          \"execution_count\": 15\n        }\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"lfw2JV11jsad\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"#inputs to the model\\n\",\n        \"src_input_ids = tf.placeholder(tf.int64, shape=(None, None))\\n\",\n        \"src_segment_ids = tf.placeholder(tf.int64, shape=(None, None))\\n\",\n        \"tgt_input_ids = tf.placeholder(tf.int64, shape=(None, None))\\n\",\n        \"tgt_segment_ids = tf.placeholder(tf.int64, shape=(None, None))\\n\",\n        \"\\n\",\n        \"batch_size = tf.shape(src_input_ids)[0]\\n\",\n        \"\\n\",\n        \"src_input_length = tf.reduce_sum(1 - tf.to_int32(tf.equal(src_input_ids, 0)),\\n\",\n        \"                             axis=1)\\n\",\n        \"tgt_input_length = tf.reduce_sum(1 - tf.to_int32(tf.equal(src_input_ids, 0)),\\n\",\n        \"                             axis=1)\\n\",\n        \"\\n\",\n        \"labels = tf.placeholder(tf.int64, shape=(None, None))\\n\",\n        \"is_target = tf.to_float(tf.not_equal(labels, 0))\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"global_step = tf.Variable(0, dtype=tf.int64, trainable=False)\\n\",\n        \"learning_rate = tf.placeholder(tf.float64, shape=(), name='lr')\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"metadata\": {\n        \"id\": \"jTFde06_kACm\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"#create the iterator \\n\",\n        \"iterator = tx.data.FeedableDataIterator({\\n\",\n        \"        'train': train_dataset, 'eval': eval_dataset, 'test': test_dataset})\\n\",\n        \"\\n\",\n        \"batch = iterator.get_next()\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"metadata\": {\n        \"id\": \"XSrDO5YBkPYh\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"a07623be-20ff-433c-a256-95f33b49b531\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 51\n        }\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"#encoder Bert model\\n\",\n        \"print(\\\"Intializing the Bert Encoder Graph\\\")\\n\",\n        \"with tf.variable_scope('bert'):\\n\",\n        \"        embedder = tx.modules.WordEmbedder(\\n\",\n        \"            vocab_size=bert_config.vocab_size,\\n\",\n        \"            hparams=bert_config.embed)\\n\",\n        \"        word_embeds = embedder(src_input_ids)\\n\",\n        \"\\n\",\n        \"        # Creates segment embeddings for each type of tokens.\\n\",\n        \"        segment_embedder = tx.modules.WordEmbedder(\\n\",\n        \"            vocab_size=bert_config.type_vocab_size,\\n\",\n        \"            hparams=bert_config.segment_embed)\\n\",\n        \"        segment_embeds = segment_embedder(src_segment_ids)\\n\",\n        \"\\n\",\n        \"        input_embeds = word_embeds + segment_embeds\\n\",\n        \"\\n\",\n        \"        # The BERT model (a TransformerEncoder)\\n\",\n        \"        encoder = tx.modules.TransformerEncoder(hparams=bert_config.encoder)\\n\",\n        \"        encoder_output = encoder(input_embeds, src_input_length)\\n\",\n        \"        \\n\",\n        \"        # Builds layers for downstream classification, which is also initialized\\n\",\n        \"        # with BERT pre-trained checkpoint.\\n\",\n        \"        with tf.variable_scope(\\\"pooler\\\"):\\n\",\n        \"            # Uses the projection of the 1st-step hidden vector of BERT output\\n\",\n        \"            # as the representation of the sentence\\n\",\n        \"            bert_sent_hidden = tf.squeeze(encoder_output[:, 0:1, :], axis=1)\\n\",\n        \"            bert_sent_output = tf.layers.dense(\\n\",\n        \"                bert_sent_hidden, config_downstream.hidden_dim,\\n\",\n        \"                activation=tf.tanh)\\n\",\n        \"            output = tf.layers.dropout(\\n\",\n        \"                bert_sent_output, rate=0.1, training=tx.global_mode_train())\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"print(\\\"loading the bert pretrained weights\\\")\\n\",\n        \"# Loads pretrained BERT model parameters\\n\",\n        \"init_checkpoint = os.path.join(bert_pretrain_dir, 'bert_model.ckpt')\\n\",\n        \"#init_checkpoint = \\\"gs://cloud-tpu-checkpoints/bert/uncased_L-12_H-768_A-12/bert_model.ckpt\\\"\\n\",\n        \"model_utils.init_bert_checkpoint(init_checkpoint)\"\n      ],\n      \"execution_count\": 18,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"Intializing the Bert Encoder Graph\\n\",\n            \"loading the bert pretrained weights\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"C5m48bu5kVXm\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"#decoder part and mle losss\\n\",\n        \"tgt_embedding = tf.concat(\\n\",\n        \"    [tf.zeros(shape=[1, embedder.dim]), embedder.embedding[1:, :]], axis=0)\\n\",\n        \"\\n\",\n        \"decoder = tx.modules.TransformerDecoder(embedding=tgt_embedding,\\n\",\n        \"                             hparams=dcoder_config)\\n\",\n        \"# For training\\n\",\n        \"outputs = decoder(\\n\",\n        \"    memory=encoder_output,\\n\",\n        \"    memory_sequence_length=src_input_length,\\n\",\n        \"    inputs=embedder(tgt_input_ids),\\n\",\n        \"    sequence_length=tgt_input_length,\\n\",\n        \"    decoding_strategy='train_greedy',\\n\",\n        \"    mode=tf.estimator.ModeKeys.TRAIN\\n\",\n        \")\\n\",\n        \"\\n\",\n        \"mle_loss = transformer_utils.smoothing_cross_entropy(\\n\",\n        \"        outputs.logits, labels, vocab_size, loss_label_confidence)\\n\",\n        \"mle_loss = tf.reduce_sum(mle_loss * is_target) / tf.reduce_sum(is_target)\\n\",\n        \"\\n\",\n        \"train_op = tx.core.get_train_op(\\n\",\n        \"        mle_loss,\\n\",\n        \"        learning_rate=learning_rate,\\n\",\n        \"        global_step=global_step,\\n\",\n        \"        hparams=opt)\\n\",\n        \"\\n\",\n        \"tf.summary.scalar('lr', learning_rate)\\n\",\n        \"tf.summary.scalar('mle_loss', mle_loss)\\n\",\n        \"summary_merged = tf.summary.merge_all()\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"metadata\": {\n        \"id\": \"sfDuR-SVkdhF\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"#prediction \\n\",\n        \"start_tokens = tf.fill([tx.utils.get_batch_size(src_input_ids)],\\n\",\n        \"                       bos_token_id)\\n\",\n        \"predictions = decoder(\\n\",\n        \"    memory=encoder_output,\\n\",\n        \"    memory_sequence_length=src_input_length,\\n\",\n        \"    decoding_strategy='infer_greedy',\\n\",\n        \"    beam_width=beam_width,\\n\",\n        \"    alpha=alpha,\\n\",\n        \"    start_tokens=start_tokens,\\n\",\n        \"    end_token=eos_token_id,\\n\",\n        \"    max_decoding_length=400,\\n\",\n        \"    mode=tf.estimator.ModeKeys.PREDICT\\n\",\n        \")\\n\",\n        \"if beam_width <= 1:\\n\",\n        \"    inferred_ids = predictions[0].sample_id\\n\",\n        \"else:\\n\",\n        \"    # Uses the best sample by beam search\\n\",\n        \"    inferred_ids = predictions['sample_id'][:, :, 0]\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"saver = tf.train.Saver(max_to_keep=5)\\n\",\n        \"best_results = {'score': 0, 'epoch': -1}\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"metadata\": {\n        \"id\": \"TCmgMIV6kzO4\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"def _train_epoch(sess, epoch, step, smry_writer):\\n\",\n        \"        \\n\",\n        \"            \\n\",\n        \"        fetches = {\\n\",\n        \"            'step': global_step,\\n\",\n        \"            'train_op': train_op,\\n\",\n        \"            'smry': summary_merged,\\n\",\n        \"            'loss': mle_loss,\\n\",\n        \"        }\\n\",\n        \"\\n\",\n        \"        while True:\\n\",\n        \"            try:\\n\",\n        \"              feed_dict = {\\n\",\n        \"                iterator.handle: iterator.get_handle(sess, 'train'),\\n\",\n        \"                tx.global_mode(): tf.estimator.ModeKeys.TRAIN,\\n\",\n        \"              }\\n\",\n        \"              op = sess.run([batch],feed_dict)\\n\",\n        \"              feed_dict = {\\n\",\n        \"                   src_input_ids:op[0]['src_input_ids'],\\n\",\n        \"                   src_segment_ids : op[0]['src_segment_ids'],\\n\",\n        \"                   tgt_input_ids:op[0]['tgt_input_ids'],\\n\",\n        \"\\n\",\n        \"                   labels:op[0]['tgt_labels'],\\n\",\n        \"                   learning_rate: utils.get_lr(step, lr),\\n\",\n        \"                   tx.global_mode(): tf.estimator.ModeKeys.TRAIN\\n\",\n        \"                }\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"              fetches_ = sess.run(fetches, feed_dict=feed_dict)\\n\",\n        \"              step, loss = fetches_['step'], fetches_['loss']\\n\",\n        \"              if step and step % display_steps == 0:\\n\",\n        \"                  logger.info('step: %d, loss: %.4f', step, loss)\\n\",\n        \"                  print('step: %d, loss: %.4f' % (step, loss))\\n\",\n        \"                  smry_writer.add_summary(fetches_['smry'], global_step=step)\\n\",\n        \"\\n\",\n        \"              if step and step % 1000 == 0:\\n\",\n        \"                  model_path = \\\"gs://bert_summ/models/model_\\\"+str(step)+\\\".ckpt\\\"\\n\",\n        \"                  logger.info('saving model to %s', model_path)\\n\",\n        \"                  print('saving model to %s' % model_path)\\n\",\n        \"                  saver.save(sess, model_path)\\n\",\n        \"              if step and step % eval_steps == 0:\\n\",\n        \"                  _eval_epoch(sess, epoch, mode='eval')\\n\",\n        \"            except tf.errors.OutOfRangeError:\\n\",\n        \"                break\\n\",\n        \"\\n\",\n        \"        return step\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"metadata\": {\n        \"id\": \"60_hbYdak5rd\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"def _eval_epoch(sess, epoch, mode):\\n\",\n        \"\\n\",\n        \"        references, hypotheses = [], []\\n\",\n        \"        bsize = test_batch_size\\n\",\n        \"        fetches = {\\n\",\n        \"                'inferred_ids': inferred_ids,\\n\",\n        \"            }\\n\",\n        \"        bno=0\\n\",\n        \"        while True:\\n\",\n        \"            \\n\",\n        \"            #print(\\\"Temp\\\",temp)\\n\",\n        \"            try:\\n\",\n        \"              print(\\\"Batch\\\",bno)\\n\",\n        \"              feed_dict = {\\n\",\n        \"              iterator.handle: iterator.get_handle(sess, 'eval'),\\n\",\n        \"              tx.global_mode(): tf.estimator.ModeKeys.EVAL,\\n\",\n        \"              }\\n\",\n        \"              op = sess.run([batch],feed_dict)\\n\",\n        \"              feed_dict = {\\n\",\n        \"                   src_input_ids:op[0]['src_input_ids'],\\n\",\n        \"                   src_segment_ids : op[0]['src_segment_ids'],\\n\",\n        \"                   tx.global_mode(): tf.estimator.ModeKeys.EVAL\\n\",\n        \"              }\\n\",\n        \"              fetches_ = sess.run(fetches, feed_dict=feed_dict)\\n\",\n        \"              labels = op[0]['tgt_labels']\\n\",\n        \"              hypotheses.extend(h.tolist() for h in fetches_['inferred_ids'])\\n\",\n        \"              references.extend(r.tolist() for r in labels)\\n\",\n        \"              hypotheses = utils.list_strip_eos(hypotheses, eos_token_id)\\n\",\n        \"              references = utils.list_strip_eos(references, eos_token_id)\\n\",\n        \"              bno = bno+1\\n\",\n        \"              \\n\",\n        \"            except tf.errors.OutOfRangeError:\\n\",\n        \"                break\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"        if mode == 'eval':\\n\",\n        \"            # Writes results to files to evaluate BLEU\\n\",\n        \"            # For 'eval' mode, the BLEU is based on token ids (rather than\\n\",\n        \"            # text tokens) and serves only as a surrogate metric to monitor\\n\",\n        \"            # the training process\\n\",\n        \"            #fname = os.path.join(model_dir, 'tmp.eval')\\n\",\n        \"            fname = \\\"./tmp.eval\\\"\\n\",\n        \"            #fname = \\\"gs://bert_summ/models/tmp.eval\\\"\\n\",\n        \"            hypotheses = tx.utils.str_join(hypotheses)\\n\",\n        \"            references = tx.utils.str_join(references)\\n\",\n        \"            hyp_fn, ref_fn = tx.utils.write_paired_text(\\n\",\n        \"                hypotheses, references, fname, mode='s')\\n\",\n        \"            eval_bleu = bleu_wrapper(ref_fn, hyp_fn, case_sensitive=True)\\n\",\n        \"            eval_bleu = 100. * eval_bleu\\n\",\n        \"            logger.info('epoch: %d, eval_bleu %.4f', epoch, eval_bleu)\\n\",\n        \"            print('epoch: %d, eval_bleu %.4f' % (epoch, eval_bleu))\\n\",\n        \"\\n\",\n        \"            if eval_bleu > best_results['score']:\\n\",\n        \"                logger.info('epoch: %d, best bleu: %.4f', epoch, eval_bleu)\\n\",\n        \"                best_results['score'] = eval_bleu\\n\",\n        \"                best_results['epoch'] = epoch\\n\",\n        \"                #model_path = os.path.join(model_dir, 'best-model.ckpt')\\n\",\n        \"                model_path = \\\"gs://bert_summ/models/best-model.ckpt\\\"\\n\",\n        \"                logger.info('saving model to %s', model_path)\\n\",\n        \"                print('saving model to %s' % model_path)\\n\",\n        \"                saver.save(sess, model_path)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"metadata\": {\n        \"id\": \"v9b6ghCsnT90\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"bc4a0e8b-e9ad-408c-a92f-c6b4455b2d03\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 357\n        }\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"#tx.utils.maybe_create_dir(model_dir)\\n\",\n        \"#logging_file = os.path.join(model_dir, 'logging.txt')\\n\",\n        \"\\n\",\n        \"model_dir = \\\"gs://bert_summ/models/\\\"\\n\",\n        \"logging_file= \\\"logging.txt\\\"\\n\",\n        \"logger = utils.get_logger(logging_file)\\n\",\n        \"with tf.Session() as sess:\\n\",\n        \"    sess.run(tf.global_variables_initializer())\\n\",\n        \"    sess.run(tf.local_variables_initializer())\\n\",\n        \"    sess.run(tf.tables_initializer())\\n\",\n        \"\\n\",\n        \"    smry_writer = tf.summary.FileWriter(model_dir, graph=sess.graph)\\n\",\n        \"\\n\",\n        \"    if run_mode == 'train_and_evaluate':\\n\",\n        \"        logger.info('Begin running with train_and_evaluate mode')\\n\",\n        \"\\n\",\n        \"        if tf.train.latest_checkpoint(model_dir) is not None:\\n\",\n        \"            logger.info('Restore latest checkpoint in %s' % model_dir)\\n\",\n        \"            saver.restore(sess, tf.train.latest_checkpoint(model_dir))\\n\",\n        \"        \\n\",\n        \"        iterator.initialize_dataset(sess)\\n\",\n        \"\\n\",\n        \"        step = 5000\\n\",\n        \"        for epoch in range(max_train_epoch):\\n\",\n        \"          iterator.restart_dataset(sess, 'train')\\n\",\n        \"          step = _train_epoch(sess, epoch, step, smry_writer)\\n\",\n        \"\\n\",\n        \"    elif run_mode == 'test':\\n\",\n        \"        logger.info('Begin running with test mode')\\n\",\n        \"\\n\",\n        \"        logger.info('Restore latest checkpoint in %s' % model_dir)\\n\",\n        \"        saver.restore(sess, tf.train.latest_checkpoint(model_dir))\\n\",\n        \"\\n\",\n        \"        _eval_epoch(sess, 0, mode='test')\\n\",\n        \"\\n\",\n        \"    else:\\n\",\n        \"        raise ValueError('Unknown mode: {}'.format(run_mode))\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:tensorflow:Restoring parameters from gs://bert_summ/models/model_5000.ckpt\\n\",\n            \"step: 5100, loss: 7.0686\\n\",\n            \"step: 5200, loss: 6.7414\\n\",\n            \"step: 5300, loss: 6.4176\\n\",\n            \"step: 5400, loss: 6.9609\\n\",\n            \"step: 5500, loss: 7.0777\\n\",\n            \"step: 5600, loss: 6.8462\\n\",\n            \"step: 5700, loss: 6.8764\\n\",\n            \"step: 5800, loss: 7.2216\\n\",\n            \"step: 5900, loss: 6.6034\\n\",\n            \"step: 6000, loss: 6.8505\\n\",\n            \"saving model to gs://bert_summ/models/model_6000.ckpt\\n\",\n            \"step: 6100, loss: 6.7107\\n\",\n            \"step: 6200, loss: 6.9797\\n\",\n            \"step: 6300, loss: 7.2937\\n\",\n            \"step: 6400, loss: 6.9824\\n\",\n            \"step: 6500, loss: 7.0897\\n\",\n            \"step: 6600, loss: 5.2173\\n\",\n            \"step: 6700, loss: 7.3187\\n\",\n            \"step: 6800, loss: 7.0490\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"metadata\": {\n        \"id\": \"L35eRRNKSoOV\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    }\n  ]\n}"
  },
  {
    "path": "Inference.py",
    "content": "from flask import Flask,request,render_template\nimport requests \nimport json\nfrom collections import OrderedDict\nimport os\nimport numpy as np\nimport tensorflow as tf\n\napp =Flask(__name__)\n\nimport sys\n\nif not 'texar_repo' in sys.path:\n  sys.path += ['texar_repo']\n\nfrom config import *\nfrom model import *\nfrom preprocess import *\n\n\nstart_tokens = tf.fill([tx.utils.get_batch_size(src_input_ids)],\n                       bos_token_id)\npredictions = decoder(\n    memory=encoder_output,\n    memory_sequence_length=src_input_length,\n    decoding_strategy='infer_greedy',\n    beam_width=beam_width,\n    alpha=alpha,\n    start_tokens=start_tokens,\n    end_token=eos_token_id,\n    max_decoding_length=400,\n    mode=tf.estimator.ModeKeys.PREDICT\n)\nif beam_width <= 1:\n    inferred_ids = predictions[0].sample_id\nelse:\n    # Uses the best sample by beam search\n    inferred_ids = predictions['sample_id'][:, :, 0]\n\n\n\n\ntokenizer = tokenization.FullTokenizer(\n      vocab_file=os.path.join(bert_pretrain_dir, 'vocab.txt'),\n      do_lower_case=True)\n\n\nsess = tf.Session()\ndef infer_single_example(story,actual_summary,tokenizer):\n      example = {\"src_txt\":story,\n      \"tgt_txt\":actual_summary\n      }\n      features = convert_single_example(1,example,max_seq_length_src,max_seq_length_tgt,\n         tokenizer)\n      feed_dict = {\n      src_input_ids:np.array(features.src_input_ids).reshape(-1,1),\n      src_segment_ids : np.array(features.src_segment_ids).reshape(-1,1)\n\n      }\n\n      references, hypotheses = [], []\n      fetches = {\n      'inferred_ids': inferred_ids,\n      }\n      fetches_ = sess.run(fetches, feed_dict=feed_dict)\n      labels = np.array(features.tgt_labels).reshape(-1,1)\n      hypotheses.extend(h.tolist() for h in fetches_['inferred_ids'])\n      references.extend(r.tolist() for r in labels)\n      hypotheses = utils.list_strip_eos(hypotheses, eos_token_id)\n      references = utils.list_strip_eos(references[0], eos_token_id)\n      hwords = tokenizer.convert_ids_to_tokens(hypotheses[0])\n      rwords = tokenizer.convert_ids_to_tokens(references[0])\n\n      hwords = tx.utils.str_join(hwords).replace(\" ##\",\"\")\n      rwords = tx.utils.str_join(rwords).replace(\" ##\",\"\")\n      print(\"Original\",rwords)\n      print(\"Generated\",hwords)\n      return hwords\n\n@app.route(\"/results\",methods=[\"GET\",\"POST\"])\ndef results():\n\tstory = request.form['story']\n\tsummary = request.form['summary']\n\thwords = infer_single_example(story,summary,tokenizer)\n\treturn hwords\n\n\nif __name__==\"__main__\":\n    sess.run(tf.global_variables_initializer())\n    sess.run(tf.local_variables_initializer())\n    sess.run(tf.tables_initializer())\n    saver.restore(sess, tf.train.latest_checkpoint(model_dir))\n    app.run(host=\"0.0.0.0\",port=1118,debug=False)\n    \n\n\n"
  },
  {
    "path": "Readme.md",
    "content": "<h3>Abstractive summarization using bert as encoder and transformer decoder</h3>\n\nI have used a text generation library called Texar , Its a beautiful library with a lot of abstractions, i would say it to be \nscikit learn for text generation problems.\n\nThe main idea behind this architecture is to use the transfer learning from pretrained BERT a masked language model ,\nI have replaced the Encoder part with BERT Encoder and the deocder is trained from the scratch.\n\nOne of the advantages of using Transfomer Networks is training is much faster than LSTM based models as we elimanate sequential behaviour in Transformer models.\n\nTransformer based models generate more gramatically correct  and coherent sentences.\n\n\n<h3>To run the model</h3>\n<pre>\nwget https://storage.googleapis.com/bert_models/2018_10_18/uncased_L-12_H-768_A-12.zip \nunzip uncased_L-12_H-768_A-12.zip\n\nPlace the story and summary files under data folder with the following names.\n-train_story.txt\n-train_summ.txt\n-eval_story.txt\n-eval_summ.txt\neach story and summary must be in a single line (see sample text given.)\n\n\nStep1:\nRun Preprocessing\n<b>python preprocess.py</b>\n\nThis creates two tfrecord files under the data folder.\n\nStep 2:\n<b>python main.py</b>\n\nConfigurations for the model can be changes from config.py file\n\nStep 3:\nInference \nRun the command <b>python inference.py</b>\nThis code runs a flask server \nUse postman to send the POST request @http://your_ip_address:1118/results\nwith two form parameters story,summary\n\n\n\n</pre>\n\n\n"
  },
  {
    "path": "bnb_4bit_training.ipynb",
    "content": "{\n  \"nbformat\": 4,\n  \"nbformat_minor\": 0,\n  \"metadata\": {\n    \"colab\": {\n      \"provenance\": [],\n      \"gpuType\": \"T4\",\n      \"include_colab_link\": true\n    },\n    \"kernelspec\": {\n      \"name\": \"python3\",\n      \"display_name\": \"Python 3\"\n    },\n    \"language_info\": {\n      \"name\": \"python\"\n    },\n    \"accelerator\": \"GPU\",\n    \"gpuClass\": \"standard\",\n    \"widgets\": {\n      \"application/vnd.jupyter.widget-state+json\": {\n        \"4dda8029a1c54f9dac38834fc49d12b7\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_e0e252da64054d6eb2f661b985aceab8\",\n              \"IPY_MODEL_43a0c376ab8d40619c84e50c1fae1bf3\",\n              \"IPY_MODEL_e7dc091b7bd54c439aaea379a15bdb6a\"\n            ],\n            \"layout\": \"IPY_MODEL_2fff8a5907fa44248e3b57dc4051236c\"\n          }\n        },\n        \"e0e252da64054d6eb2f661b985aceab8\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_73f8063f8b4c4c809ce4e410ccafee96\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_11566fa4205343cb9378f078b5b18f98\",\n            \"value\": \"Downloading (…)okenizer_config.json: 100%\"\n          }\n        },\n        \"43a0c376ab8d40619c84e50c1fae1bf3\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_18f17e571bb3482a9052b1b268abacde\",\n            \"max\": 156,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_96aa7b38a32a4f4db15ba6841c750fd8\",\n            \"value\": 156\n          }\n        },\n        \"e7dc091b7bd54c439aaea379a15bdb6a\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_9bca368a376542d2b60594c47050470d\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_4480a062b90d4d13b12c3ecc0b832d0e\",\n            \"value\": \" 156/156 [00:00&lt;00:00, 10.6kB/s]\"\n          }\n        },\n        \"2fff8a5907fa44248e3b57dc4051236c\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"73f8063f8b4c4c809ce4e410ccafee96\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"11566fa4205343cb9378f078b5b18f98\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"18f17e571bb3482a9052b1b268abacde\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"96aa7b38a32a4f4db15ba6841c750fd8\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"9bca368a376542d2b60594c47050470d\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"4480a062b90d4d13b12c3ecc0b832d0e\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"3daf35d9c166402d94afcfd111b63807\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_44400a34234341f7a182d99ef8657dc8\",\n              \"IPY_MODEL_a7f997badd8d47729dac8cd0aed205dc\",\n              \"IPY_MODEL_57166e4e5c024e1cacc63f2bbed51560\"\n            ],\n            \"layout\": \"IPY_MODEL_0d8925b6952e4c6583b262124f28febc\"\n          }\n        },\n        \"44400a34234341f7a182d99ef8657dc8\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_26d72ee7eff34d74986f596cf9c6a557\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_5f9382acf893491ab582a7282edff80d\",\n            \"value\": \"Downloading (…)olve/main/vocab.json: 100%\"\n          }\n        },\n        \"a7f997badd8d47729dac8cd0aed205dc\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_f7aff94ef61047cbaa55eeb098d205b5\",\n            \"max\": 1077392,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_45b8e4b9a2af4a72b08f0e2a3c78c63c\",\n            \"value\": 1077392\n          }\n        },\n        \"57166e4e5c024e1cacc63f2bbed51560\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_2385294782954692a455fb65e9b59733\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_1371bd3069e541d3b5e40813bac2c490\",\n            \"value\": \" 1.08M/1.08M [00:00&lt;00:00, 5.60MB/s]\"\n          }\n        },\n        \"0d8925b6952e4c6583b262124f28febc\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"26d72ee7eff34d74986f596cf9c6a557\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"5f9382acf893491ab582a7282edff80d\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"f7aff94ef61047cbaa55eeb098d205b5\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"45b8e4b9a2af4a72b08f0e2a3c78c63c\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"2385294782954692a455fb65e9b59733\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"1371bd3069e541d3b5e40813bac2c490\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"083f6c3c60ad4370b6587761b49e5654\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_8366012339cf4c1c8ef44ba3341df1b3\",\n              \"IPY_MODEL_139901b773f141b196281de1c23f40df\",\n              \"IPY_MODEL_7e0ae4a2ebe446b683e0f8be4a70dfd5\"\n            ],\n            \"layout\": \"IPY_MODEL_cc64b611043840ea9a6c1421d7327bb0\"\n          }\n        },\n        \"8366012339cf4c1c8ef44ba3341df1b3\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_d687111284984c0da14ff9f534b53c96\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_d7ae79cdb87146729acac1b5f2f70263\",\n            \"value\": \"Downloading (…)olve/main/merges.txt: 100%\"\n          }\n        },\n        \"139901b773f141b196281de1c23f40df\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_fe2e1e6d423c46c19ca6abc4bc397860\",\n            \"max\": 456583,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_2977c91d68ec48b09605472e2b46c62c\",\n            \"value\": 456583\n          }\n        },\n        \"7e0ae4a2ebe446b683e0f8be4a70dfd5\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_3642f2a6cbd541408e3a88745f597f38\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_756359b628f74b4ebac5392e09a03e83\",\n            \"value\": \" 457k/457k [00:00&lt;00:00, 7.25MB/s]\"\n          }\n        },\n        \"cc64b611043840ea9a6c1421d7327bb0\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"d687111284984c0da14ff9f534b53c96\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"d7ae79cdb87146729acac1b5f2f70263\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"fe2e1e6d423c46c19ca6abc4bc397860\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"2977c91d68ec48b09605472e2b46c62c\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"3642f2a6cbd541408e3a88745f597f38\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"756359b628f74b4ebac5392e09a03e83\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"af26a845a28e47a98a42c2344b20430d\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_559a87b3917a47738cd4d0172dc276cd\",\n              \"IPY_MODEL_c41ee826c58a4542a435f41a8cf2ed1c\",\n              \"IPY_MODEL_3e70a3c512a04c25a90217466f0b904f\"\n            ],\n            \"layout\": \"IPY_MODEL_b2e03d1e18a546a3bf7edc942a9ce2ee\"\n          }\n        },\n        \"559a87b3917a47738cd4d0172dc276cd\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_29fa4f908d07492387dc9f7607f55312\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_76dea45f667b40278e34647e6b6dbeb1\",\n            \"value\": \"Downloading (…)/main/tokenizer.json: 100%\"\n          }\n        },\n        \"c41ee826c58a4542a435f41a8cf2ed1c\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_56649cdda2034e0189aec800c6d8f4af\",\n            \"max\": 2113710,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_4fe01ea1a4c04d2fa9851b9e515aa79a\",\n            \"value\": 2113710\n          }\n        },\n        \"3e70a3c512a04c25a90217466f0b904f\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_e1851b9cf0124fa3b7d876c40244c61c\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_9560bb9113724753a768d58ca0367046\",\n            \"value\": \" 2.11M/2.11M [00:00&lt;00:00, 10.4MB/s]\"\n          }\n        },\n        \"b2e03d1e18a546a3bf7edc942a9ce2ee\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"29fa4f908d07492387dc9f7607f55312\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"76dea45f667b40278e34647e6b6dbeb1\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"56649cdda2034e0189aec800c6d8f4af\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"4fe01ea1a4c04d2fa9851b9e515aa79a\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"e1851b9cf0124fa3b7d876c40244c61c\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"9560bb9113724753a768d58ca0367046\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"46403b1a813e4e9d96718ec9c85a5065\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_de120e4ae5d7455390945a2df5791743\",\n              \"IPY_MODEL_92f04cfa9cd04e34aa7ef73499f0c62b\",\n              \"IPY_MODEL_cb4576abb0e4409aa4125a1eb3612cd7\"\n            ],\n            \"layout\": \"IPY_MODEL_8ffb76c7b6ba46698dbf1c7dedfce105\"\n          }\n        },\n        \"de120e4ae5d7455390945a2df5791743\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_974775d2cc954c9a83942ce57e263a95\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_7ebe4b354fc04b099ff8064700d034c4\",\n            \"value\": \"Downloading (…)cial_tokens_map.json: 100%\"\n          }\n        },\n        \"92f04cfa9cd04e34aa7ef73499f0c62b\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_171c629395f14252a0c9f0d52ddde7f6\",\n            \"max\": 90,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_6ffb160a906143fca91bd8a62736b966\",\n            \"value\": 90\n          }\n        },\n        \"cb4576abb0e4409aa4125a1eb3612cd7\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_20c4ed70df98425b878c8bc0aeb935b9\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_cf87f95a7c514168a260c0302dd05dc9\",\n            \"value\": \" 90.0/90.0 [00:00&lt;00:00, 1.96kB/s]\"\n          }\n        },\n        \"8ffb76c7b6ba46698dbf1c7dedfce105\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"974775d2cc954c9a83942ce57e263a95\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"7ebe4b354fc04b099ff8064700d034c4\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"171c629395f14252a0c9f0d52ddde7f6\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"6ffb160a906143fca91bd8a62736b966\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"20c4ed70df98425b878c8bc0aeb935b9\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"cf87f95a7c514168a260c0302dd05dc9\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"3bcad19a79064a7dbbabaabb0f1c8a9f\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_a4f10c97b95b4b19b2a9a02fbc60fa29\",\n              \"IPY_MODEL_53fc8538b34c4c34809bbc7c75bfc029\",\n              \"IPY_MODEL_f75760a7141245a0a8881e3d7476c877\"\n            ],\n            \"layout\": \"IPY_MODEL_87855b48d3aa41e7b38545aba53d71be\"\n          }\n        },\n        \"a4f10c97b95b4b19b2a9a02fbc60fa29\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_7ea2647caa7d4b14989e6a4f795b409b\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_a173bccd53974a248adb99dc1bd5b4d2\",\n            \"value\": \"Downloading (…)lve/main/config.json: 100%\"\n          }\n        },\n        \"53fc8538b34c4c34809bbc7c75bfc029\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_3f9845343c7f4ffea9f6640c7f0ec9ab\",\n            \"max\": 613,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_b3a0d237f6fe4b6dae958fef4765786d\",\n            \"value\": 613\n          }\n        },\n        \"f75760a7141245a0a8881e3d7476c877\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_428c0f8165604415af070ee916ffa02a\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_b427fd7a2fdc49388e34c1e390c13260\",\n            \"value\": \" 613/613 [00:00&lt;00:00, 19.2kB/s]\"\n          }\n        },\n        \"87855b48d3aa41e7b38545aba53d71be\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"7ea2647caa7d4b14989e6a4f795b409b\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"a173bccd53974a248adb99dc1bd5b4d2\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"3f9845343c7f4ffea9f6640c7f0ec9ab\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"b3a0d237f6fe4b6dae958fef4765786d\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"428c0f8165604415af070ee916ffa02a\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"b427fd7a2fdc49388e34c1e390c13260\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"3527915a5473497b81ccbafafc6f7345\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_f5c40d1c5de943d4bce6939895d0ebf5\",\n              \"IPY_MODEL_72e94f7605c8452081f13051fe4eaba1\",\n              \"IPY_MODEL_0c2a80f722f54658b6d25079377b68ff\"\n            ],\n            \"layout\": \"IPY_MODEL_85165564577b4e3ea86d8e4113425133\"\n          }\n        },\n        \"f5c40d1c5de943d4bce6939895d0ebf5\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_07a067b7fe0049088cd2049382701467\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_9e6e8895372e450e9a9c926bd9d66bfc\",\n            \"value\": \"Downloading (…)model.bin.index.json: 100%\"\n          }\n        },\n        \"72e94f7605c8452081f13051fe4eaba1\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_3dc35e22727744768f2c113f9a5358a9\",\n            \"max\": 57712,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_e97ff06782a24e8897c0f6b4acdc3579\",\n            \"value\": 57712\n          }\n        },\n        \"0c2a80f722f54658b6d25079377b68ff\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_cc5082778cd743b7b494485050473841\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_97e06f7effa149d8afe85c153ede1d29\",\n            \"value\": \" 57.7k/57.7k [00:00&lt;00:00, 675kB/s]\"\n          }\n        },\n        \"85165564577b4e3ea86d8e4113425133\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"07a067b7fe0049088cd2049382701467\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"9e6e8895372e450e9a9c926bd9d66bfc\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"3dc35e22727744768f2c113f9a5358a9\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"e97ff06782a24e8897c0f6b4acdc3579\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"cc5082778cd743b7b494485050473841\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"97e06f7effa149d8afe85c153ede1d29\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"34283ecf8ccd4cf69c7292b120e2cb43\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_685bfd23604147978b3bb8ea1d758107\",\n              \"IPY_MODEL_9646be0b72a44f4ba6cfc68f2a522028\",\n              \"IPY_MODEL_2e523299ecbb4ef3adaa95f8dd2ab072\"\n            ],\n            \"layout\": \"IPY_MODEL_b1d721eb56d243bb865e90b61c4a9785\"\n          }\n        },\n        \"685bfd23604147978b3bb8ea1d758107\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_df1a778a5d254fd997e6aa992fa3d537\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_28e540c117fb4766a344a5a1a887115e\",\n            \"value\": \"Downloading shards: 100%\"\n          }\n        },\n        \"9646be0b72a44f4ba6cfc68f2a522028\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_1cd4abd9506d4f329d7dcf58e7008ba7\",\n            \"max\": 46,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_6c5af425aaf44adea4e2027ba6062b9c\",\n            \"value\": 46\n          }\n        },\n        \"2e523299ecbb4ef3adaa95f8dd2ab072\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_b9a91217395a4550a6012a06efd29e7c\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_f9c98867724e4dfda3b8918803c5bb1e\",\n            \"value\": \" 46/46 [04:27&lt;00:00,  5.07s/it]\"\n          }\n        },\n        \"b1d721eb56d243bb865e90b61c4a9785\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"df1a778a5d254fd997e6aa992fa3d537\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"28e540c117fb4766a344a5a1a887115e\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"1cd4abd9506d4f329d7dcf58e7008ba7\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"6c5af425aaf44adea4e2027ba6062b9c\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"b9a91217395a4550a6012a06efd29e7c\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"f9c98867724e4dfda3b8918803c5bb1e\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"2c3a591792794cd98a2c7317c6f8dc7b\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_143568f550f34b1182a411497051308d\",\n              \"IPY_MODEL_8e69d62d0dc14ce4b7b0269bf4e2eb78\",\n              \"IPY_MODEL_5b22f2c813454e4787301c33dda5692f\"\n            ],\n            \"layout\": \"IPY_MODEL_02707d76344647ee9a5eb0e0205e2ff5\"\n          }\n        },\n        \"143568f550f34b1182a411497051308d\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_3f1781d7d05f4affbb0cf69a2d83cb76\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_5cc81942930d4d5fb039adaf004227c0\",\n            \"value\": \"Downloading (…)l-00001-of-00046.bin: 100%\"\n          }\n        },\n        \"8e69d62d0dc14ce4b7b0269bf4e2eb78\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_0c59932a3cdd4931a337d18558ac93f0\",\n            \"max\": 925994625,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_3a2bcd6c344b43b7bedddd4b56e833c8\",\n            \"value\": 925994625\n          }\n        },\n        \"5b22f2c813454e4787301c33dda5692f\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_16a6b4938772480d9813c36caf450514\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_b757c51a4db04c3aaaf2f229d25ce2b6\",\n            \"value\": \" 926M/926M [00:03&lt;00:00, 230MB/s]\"\n          }\n        },\n        \"02707d76344647ee9a5eb0e0205e2ff5\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"3f1781d7d05f4affbb0cf69a2d83cb76\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"5cc81942930d4d5fb039adaf004227c0\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"0c59932a3cdd4931a337d18558ac93f0\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"3a2bcd6c344b43b7bedddd4b56e833c8\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"16a6b4938772480d9813c36caf450514\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"b757c51a4db04c3aaaf2f229d25ce2b6\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"56ec1bada613446ca99bf8a9c4ab3d69\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_8619ac7034eb43d0ba5a05f80f080786\",\n              \"IPY_MODEL_128e986f97fb4bdcb0c15b60499c35b2\",\n              \"IPY_MODEL_9d3f40e1e44e46439fc8c1247704772e\"\n            ],\n            \"layout\": \"IPY_MODEL_f0f7a5aa0cf04bdfaedd20e83919bd50\"\n          }\n        },\n        \"8619ac7034eb43d0ba5a05f80f080786\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_220120f060f348b4ae9ff2beba7e5883\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_8559ddfaaaf140bd8b581cc7c3ed992b\",\n            \"value\": \"Downloading (…)l-00002-of-00046.bin: 100%\"\n          }\n        },\n        \"128e986f97fb4bdcb0c15b60499c35b2\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_ea5fb9415a3b40ae9f51354a1e2c37bb\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_e5127004f16d436cb7aa3f37fb4881f0\",\n            \"value\": 910328184\n          }\n        },\n        \"9d3f40e1e44e46439fc8c1247704772e\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_eb322e7aa135490c9da4c0c56a77e087\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_d30f71b6100d4227af6fc1b21eacd5fe\",\n            \"value\": \" 910M/910M [00:07&lt;00:00, 51.3MB/s]\"\n          }\n        },\n        \"f0f7a5aa0cf04bdfaedd20e83919bd50\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"220120f060f348b4ae9ff2beba7e5883\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"8559ddfaaaf140bd8b581cc7c3ed992b\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"ea5fb9415a3b40ae9f51354a1e2c37bb\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"e5127004f16d436cb7aa3f37fb4881f0\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"eb322e7aa135490c9da4c0c56a77e087\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"d30f71b6100d4227af6fc1b21eacd5fe\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"d36a9cd4315147329650fad25e46d671\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_55b6e0c2dbc3409e82e79427d639811f\",\n              \"IPY_MODEL_174164ea94fa4188a5b2daa25533a006\",\n              \"IPY_MODEL_243bcbec761c40c7906826ffdd9435fa\"\n            ],\n            \"layout\": \"IPY_MODEL_f734f41601204063aa733845a3b95d17\"\n          }\n        },\n        \"55b6e0c2dbc3409e82e79427d639811f\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_567ad6f6c2e3403285dd164db46659d0\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_cc1c1c1104644aa381ce702aa24105b9\",\n            \"value\": \"Downloading (…)l-00003-of-00046.bin: 100%\"\n          }\n        },\n        \"174164ea94fa4188a5b2daa25533a006\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_be1890dbda57429cb2738cea2183555c\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_678a8fc1687749209ea33c4a1026a23a\",\n            \"value\": 910328184\n          }\n        },\n        \"243bcbec761c40c7906826ffdd9435fa\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_58db995285574441871ce58baedb5ab8\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_a5a438b2981a4b2d8405b9614f889193\",\n            \"value\": \" 910M/910M [00:06&lt;00:00, 228MB/s]\"\n          }\n        },\n        \"f734f41601204063aa733845a3b95d17\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"567ad6f6c2e3403285dd164db46659d0\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"cc1c1c1104644aa381ce702aa24105b9\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"be1890dbda57429cb2738cea2183555c\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"678a8fc1687749209ea33c4a1026a23a\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"58db995285574441871ce58baedb5ab8\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"a5a438b2981a4b2d8405b9614f889193\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"82cd7bf0c798403c8cfcb32afb6cd0ae\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_e6f797672bea411fb4c4ca326c7edac8\",\n              \"IPY_MODEL_86210c9300df4d8a90f06d5e6dd9507b\",\n              \"IPY_MODEL_d56fb3801e574fe5825f946a06ce4f9c\"\n            ],\n            \"layout\": \"IPY_MODEL_83e5f8d407754c80873b032d2a57462b\"\n          }\n        },\n        \"e6f797672bea411fb4c4ca326c7edac8\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_f30db7c8e3dc4aa08f097b4b6e2bda68\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_3b8c1ba088d84e918d5f39ac1dce6bf0\",\n            \"value\": \"Downloading (…)l-00004-of-00046.bin: 100%\"\n          }\n        },\n        \"86210c9300df4d8a90f06d5e6dd9507b\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_352ce44060c54cdc99d2e5f199ab784b\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_06b274ac8797407483e6f1828dc059c5\",\n            \"value\": 910328184\n          }\n        },\n        \"d56fb3801e574fe5825f946a06ce4f9c\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_ab515be396d54273a28e80f3251e80d9\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_c51ab3d1855e47d9b21ea3d8efd47d90\",\n            \"value\": \" 910M/910M [00:03&lt;00:00, 249MB/s]\"\n          }\n        },\n        \"83e5f8d407754c80873b032d2a57462b\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"f30db7c8e3dc4aa08f097b4b6e2bda68\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"3b8c1ba088d84e918d5f39ac1dce6bf0\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"352ce44060c54cdc99d2e5f199ab784b\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"06b274ac8797407483e6f1828dc059c5\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"ab515be396d54273a28e80f3251e80d9\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"c51ab3d1855e47d9b21ea3d8efd47d90\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"32e975af7d5e47c38cdb539fbf64daa6\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_447d1f1c84244df380ca5dcdecc6a131\",\n              \"IPY_MODEL_c5b9cb57416b41e8ba8947a4791e00cf\",\n              \"IPY_MODEL_73ebd68bc7294d10ad7cac722f8230b8\"\n            ],\n            \"layout\": \"IPY_MODEL_14fb72ac2ec64dc2bdd1481765e8f426\"\n          }\n        },\n        \"447d1f1c84244df380ca5dcdecc6a131\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_e2c4a11b7b3f4239b776c0b9091567e0\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_488cbe1b8cd94a5b831ec25fab67c68a\",\n            \"value\": \"Downloading (…)l-00005-of-00046.bin: 100%\"\n          }\n        },\n        \"c5b9cb57416b41e8ba8947a4791e00cf\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_bf82f6397c954c87bb004dca5a941d8a\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_c1fbab7d64b745518eabc5294ed65ef8\",\n            \"value\": 910328184\n          }\n        },\n        \"73ebd68bc7294d10ad7cac722f8230b8\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_eec2597f1d79458bbb94fcb6ecbc673b\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_2a9908e7feeb447693f190da28b0bd9b\",\n            \"value\": \" 910M/910M [00:05&lt;00:00, 200MB/s]\"\n          }\n        },\n        \"14fb72ac2ec64dc2bdd1481765e8f426\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"e2c4a11b7b3f4239b776c0b9091567e0\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"488cbe1b8cd94a5b831ec25fab67c68a\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"bf82f6397c954c87bb004dca5a941d8a\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"c1fbab7d64b745518eabc5294ed65ef8\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"eec2597f1d79458bbb94fcb6ecbc673b\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"2a9908e7feeb447693f190da28b0bd9b\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"23986ef9fd874b10bb036bfd112f78ae\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_b1b481d7010b4452b280b9880919cd80\",\n              \"IPY_MODEL_a8509d4279d74b92a03f7925e98dba2a\",\n              \"IPY_MODEL_47650d294e214c42993a3027ac819d8f\"\n            ],\n            \"layout\": \"IPY_MODEL_31cd2c4b1f4a4a6fa287151c16fb9349\"\n          }\n        },\n        \"b1b481d7010b4452b280b9880919cd80\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_7d587694fe524e9f8b77567e0c931bc1\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_ca59d3a8b8184bdfb7259d35d9b71930\",\n            \"value\": \"Downloading (…)l-00006-of-00046.bin: 100%\"\n          }\n        },\n        \"a8509d4279d74b92a03f7925e98dba2a\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_e404b962e80d43bf81a327a43f82c7bd\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_6f251c68c4564c59aefc458c64fce56a\",\n            \"value\": 910328184\n          }\n        },\n        \"47650d294e214c42993a3027ac819d8f\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_a5e06fc3e2e943fea0afd92e1c587c33\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_bd429abc607b4103b688f5b7d581d1b2\",\n            \"value\": \" 910M/910M [00:03&lt;00:00, 246MB/s]\"\n          }\n        },\n        \"31cd2c4b1f4a4a6fa287151c16fb9349\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"7d587694fe524e9f8b77567e0c931bc1\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"ca59d3a8b8184bdfb7259d35d9b71930\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"e404b962e80d43bf81a327a43f82c7bd\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"6f251c68c4564c59aefc458c64fce56a\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"a5e06fc3e2e943fea0afd92e1c587c33\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"bd429abc607b4103b688f5b7d581d1b2\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"8f6c5dd15d8949619fc9b2092d8ca946\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_5798a31bd44348d89d6379298e6df154\",\n              \"IPY_MODEL_6e4430f78d35481fb307936e75d77216\",\n              \"IPY_MODEL_01572529c80b4488b23ffd03a376338c\"\n            ],\n            \"layout\": \"IPY_MODEL_0ae2cfed5fdd4d5896dff1167ddef45d\"\n          }\n        },\n        \"5798a31bd44348d89d6379298e6df154\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_422f825587034ebc8aed3fce5585f412\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_0937591b34e740838cd8208979a72e32\",\n            \"value\": \"Downloading (…)l-00007-of-00046.bin: 100%\"\n          }\n        },\n        \"6e4430f78d35481fb307936e75d77216\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_ab36702aaef44cb2ad970cfc9a00503e\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_c5f282668f8349e8901d22400cdc0103\",\n            \"value\": 910328184\n          }\n        },\n        \"01572529c80b4488b23ffd03a376338c\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_1b561afcdee044d6b13e0266efd4b482\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_a46412f1a5ea44ee90483d1527f43cfe\",\n            \"value\": \" 910M/910M [00:03&lt;00:00, 255MB/s]\"\n          }\n        },\n        \"0ae2cfed5fdd4d5896dff1167ddef45d\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"422f825587034ebc8aed3fce5585f412\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"0937591b34e740838cd8208979a72e32\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"ab36702aaef44cb2ad970cfc9a00503e\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"c5f282668f8349e8901d22400cdc0103\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"1b561afcdee044d6b13e0266efd4b482\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"a46412f1a5ea44ee90483d1527f43cfe\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"6c30ed71c9f74c1a8a52ae4db40b2e67\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_3d4ca54f279544ffbdcb3f649e42c443\",\n              \"IPY_MODEL_020a32c6a9bf4b979242c3129c2981b1\",\n              \"IPY_MODEL_13143288627d438c9f9b451645fd80d0\"\n            ],\n            \"layout\": \"IPY_MODEL_ebea7e0a5a124933adde20b3b0112c8a\"\n          }\n        },\n        \"3d4ca54f279544ffbdcb3f649e42c443\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_5307f143b29d451887f51ab83d4760ed\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_ed3fd7343b514183862d96c2f162acf2\",\n            \"value\": \"Downloading (…)l-00008-of-00046.bin: 100%\"\n          }\n        },\n        \"020a32c6a9bf4b979242c3129c2981b1\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_658875ca73d442a89d95e0c342e0165a\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_edc21d10e8ce4f5bbb1903e52b10db99\",\n            \"value\": 910328184\n          }\n        },\n        \"13143288627d438c9f9b451645fd80d0\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_5aabc69aac1443468bf163d42b258888\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_5e678d94d0c24e779f8595ebbfe2a4b9\",\n            \"value\": \" 910M/910M [00:03&lt;00:00, 220MB/s]\"\n          }\n        },\n        \"ebea7e0a5a124933adde20b3b0112c8a\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"5307f143b29d451887f51ab83d4760ed\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"ed3fd7343b514183862d96c2f162acf2\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"658875ca73d442a89d95e0c342e0165a\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"edc21d10e8ce4f5bbb1903e52b10db99\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"5aabc69aac1443468bf163d42b258888\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"5e678d94d0c24e779f8595ebbfe2a4b9\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"cd4fa09afe2947f9b56c871f5ab7674a\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_c1f37b729da34e5195da3daecd558183\",\n              \"IPY_MODEL_2794d7e2ef1543d896339f3df453c693\",\n              \"IPY_MODEL_313f047cd9a94d628562f71d1b50ade2\"\n            ],\n            \"layout\": \"IPY_MODEL_c235550064204eeeb399e5f5785bd89e\"\n          }\n        },\n        \"c1f37b729da34e5195da3daecd558183\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_1ce4fe592a8943d582dab3a429fee0ac\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_e2eb46ae412d42618be3e983e7774a6f\",\n            \"value\": \"Downloading (…)l-00009-of-00046.bin: 100%\"\n          }\n        },\n        \"2794d7e2ef1543d896339f3df453c693\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_0d8b16cb262743c38aebfaf59d76ad72\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_53bc1c3d484c4ef7a0402e871709467c\",\n            \"value\": 910328184\n          }\n        },\n        \"313f047cd9a94d628562f71d1b50ade2\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_ef0f33fefcbd416dbe7ec1d66747b277\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_61d3c77502ba484d988909392a1b0cde\",\n            \"value\": \" 910M/910M [00:05&lt;00:00, 266MB/s]\"\n          }\n        },\n        \"c235550064204eeeb399e5f5785bd89e\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"1ce4fe592a8943d582dab3a429fee0ac\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"e2eb46ae412d42618be3e983e7774a6f\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"0d8b16cb262743c38aebfaf59d76ad72\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"53bc1c3d484c4ef7a0402e871709467c\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"ef0f33fefcbd416dbe7ec1d66747b277\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"61d3c77502ba484d988909392a1b0cde\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"ce622a736e674dd8bbb44bb3770edeec\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_11a304faca7c485f872cd2960cb08a70\",\n              \"IPY_MODEL_5f41dd1d1f0d4993a7a7546fca2c5fa5\",\n              \"IPY_MODEL_44ef7387ed4b431994877f3bf685bae9\"\n            ],\n            \"layout\": \"IPY_MODEL_9afe2fe999934ea88c3e9f818c12b736\"\n          }\n        },\n        \"11a304faca7c485f872cd2960cb08a70\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_b1c4b8a4449846fb8c973c50f9135c39\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_7a6019f79e014d3f9aad568657f57ab7\",\n            \"value\": \"Downloading (…)l-00010-of-00046.bin: 100%\"\n          }\n        },\n        \"5f41dd1d1f0d4993a7a7546fca2c5fa5\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_1353871fbe8248b5baf2064c6c9a8523\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_c2abddd737ea45ee83fcaa7da402ba64\",\n            \"value\": 910328184\n          }\n        },\n        \"44ef7387ed4b431994877f3bf685bae9\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_4ddf5968860944f9a98cb86c87e50475\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_11694f04f8d54aabbe88a5191d6ed7a1\",\n            \"value\": \" 910M/910M [00:03&lt;00:00, 218MB/s]\"\n          }\n        },\n        \"9afe2fe999934ea88c3e9f818c12b736\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"b1c4b8a4449846fb8c973c50f9135c39\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"7a6019f79e014d3f9aad568657f57ab7\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"1353871fbe8248b5baf2064c6c9a8523\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"c2abddd737ea45ee83fcaa7da402ba64\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"4ddf5968860944f9a98cb86c87e50475\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"11694f04f8d54aabbe88a5191d6ed7a1\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"809aa3334f9a4f3ebaac51fa1e3c6a71\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_8dfe08e9fe9b4f8db6af7a0daaab34d5\",\n              \"IPY_MODEL_0ee484c157854d51a56a23fedf0e8c01\",\n              \"IPY_MODEL_63b801bdfdbc4039b60ae3eb70ea7878\"\n            ],\n            \"layout\": \"IPY_MODEL_52e71e07f9ab499680b39e651c01299e\"\n          }\n        },\n        \"8dfe08e9fe9b4f8db6af7a0daaab34d5\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_b690c09f1f354ed2b9a92ec9e924aeef\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_1be403b93c304f93806fc197b9f213bf\",\n            \"value\": \"Downloading (…)l-00011-of-00046.bin: 100%\"\n          }\n        },\n        \"0ee484c157854d51a56a23fedf0e8c01\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_c033a3fc865c465b91ca59b731317e6f\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_c8f72bf608ec46e891f5bb577cb35c04\",\n            \"value\": 910328184\n          }\n        },\n        \"63b801bdfdbc4039b60ae3eb70ea7878\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_b759a9dba3c54819be1bc3a3dfcbfb36\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_709f5a80e82d4509abccac058187969e\",\n            \"value\": \" 910M/910M [00:14&lt;00:00, 150MB/s]\"\n          }\n        },\n        \"52e71e07f9ab499680b39e651c01299e\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"b690c09f1f354ed2b9a92ec9e924aeef\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"1be403b93c304f93806fc197b9f213bf\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"c033a3fc865c465b91ca59b731317e6f\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"c8f72bf608ec46e891f5bb577cb35c04\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"b759a9dba3c54819be1bc3a3dfcbfb36\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"709f5a80e82d4509abccac058187969e\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"eead49138651461b99b62a5f2d9a58e7\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_5329b729ec8341c393943bee5b8a6c59\",\n              \"IPY_MODEL_7777b18fdd9a45469df47fcd8bf5752e\",\n              \"IPY_MODEL_0a1ad847df9548128812430ac683c948\"\n            ],\n            \"layout\": \"IPY_MODEL_c0957f187b654dfbb6606e777eedbbaf\"\n          }\n        },\n        \"5329b729ec8341c393943bee5b8a6c59\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_f3e5142b4e8f493684ad9c25af192925\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_d9540d78686741e0ab500c7b7ff13e44\",\n            \"value\": \"Downloading (…)l-00012-of-00046.bin: 100%\"\n          }\n        },\n        \"7777b18fdd9a45469df47fcd8bf5752e\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_baed6664b673437a8934f51e5ef94cad\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_55110aebdaa0456f962b97de322088af\",\n            \"value\": 910328184\n          }\n        },\n        \"0a1ad847df9548128812430ac683c948\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_746e1b660217449b9d79719b3b982ef5\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_1a93d95c6b6447c186cf30afa84deea7\",\n            \"value\": \" 910M/910M [00:03&lt;00:00, 233MB/s]\"\n          }\n        },\n        \"c0957f187b654dfbb6606e777eedbbaf\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"f3e5142b4e8f493684ad9c25af192925\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"d9540d78686741e0ab500c7b7ff13e44\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"baed6664b673437a8934f51e5ef94cad\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"55110aebdaa0456f962b97de322088af\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"746e1b660217449b9d79719b3b982ef5\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"1a93d95c6b6447c186cf30afa84deea7\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"71da713626974bd8b960702e0552a088\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_b6f4796f219b4f4ea1a048fd6e0eb1c8\",\n              \"IPY_MODEL_be54b5fb20014aa79f365b56938dda29\",\n              \"IPY_MODEL_d67c72b21291410a88d3921ca8727f50\"\n            ],\n            \"layout\": \"IPY_MODEL_004efb3a49ea4f6489d364fd791ca61c\"\n          }\n        },\n        \"b6f4796f219b4f4ea1a048fd6e0eb1c8\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_607555489e2a449283423e9b092a3967\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_da4c8b8065f040d89f016dcae293e5e8\",\n            \"value\": \"Downloading (…)l-00013-of-00046.bin: 100%\"\n          }\n        },\n        \"be54b5fb20014aa79f365b56938dda29\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_c33226c7265e407aad5b3acd4098ae42\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_755655b98aae4792879fb5fe8c76632d\",\n            \"value\": 910328184\n          }\n        },\n        \"d67c72b21291410a88d3921ca8727f50\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_7d7cde351a464875916d936998420a15\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_d03638361cad4b98960d9b28b956381a\",\n            \"value\": \" 910M/910M [00:03&lt;00:00, 299MB/s]\"\n          }\n        },\n        \"004efb3a49ea4f6489d364fd791ca61c\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"607555489e2a449283423e9b092a3967\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"da4c8b8065f040d89f016dcae293e5e8\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"c33226c7265e407aad5b3acd4098ae42\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"755655b98aae4792879fb5fe8c76632d\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"7d7cde351a464875916d936998420a15\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"d03638361cad4b98960d9b28b956381a\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"59caf02d181341ea81b1e0d7f3f63fab\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_1086211808ff4b88bfbec7bb831696af\",\n              \"IPY_MODEL_84d00f6bce284d2dbcb8ffcb17aba143\",\n              \"IPY_MODEL_2fd9fb963f5f47d1acfcc75e5c53d97c\"\n            ],\n            \"layout\": \"IPY_MODEL_7089e6d90c2342008d6a4a7455ffd156\"\n          }\n        },\n        \"1086211808ff4b88bfbec7bb831696af\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_7179091d1a3a41f1842cb20f9e2b0063\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_c8a1c8ff762a4a41bf700a777cee83d7\",\n            \"value\": \"Downloading (…)l-00014-of-00046.bin: 100%\"\n          }\n        },\n        \"84d00f6bce284d2dbcb8ffcb17aba143\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_788c219f0bd44a2ea9bf4b5c34fc9e8c\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_0b240c99432940bfb405b267f04c471f\",\n            \"value\": 910328184\n          }\n        },\n        \"2fd9fb963f5f47d1acfcc75e5c53d97c\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_265a26354cbf411b816f49a1d4f1a16f\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_88a71774ab1b408fbdd0cc80815de782\",\n            \"value\": \" 910M/910M [00:03&lt;00:00, 291MB/s]\"\n          }\n        },\n        \"7089e6d90c2342008d6a4a7455ffd156\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"7179091d1a3a41f1842cb20f9e2b0063\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"c8a1c8ff762a4a41bf700a777cee83d7\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"788c219f0bd44a2ea9bf4b5c34fc9e8c\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"0b240c99432940bfb405b267f04c471f\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"265a26354cbf411b816f49a1d4f1a16f\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"88a71774ab1b408fbdd0cc80815de782\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"6524a46e508d4820a0d3d8a7d2231301\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_1c4fb40f1e4d4689a4f56845ca76c083\",\n              \"IPY_MODEL_1c4e665173a64b60b16b325238617bfe\",\n              \"IPY_MODEL_01177211c6e942bb99910e3a4450b447\"\n            ],\n            \"layout\": \"IPY_MODEL_d9b81389512248eda61573b451e18c96\"\n          }\n        },\n        \"1c4fb40f1e4d4689a4f56845ca76c083\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_864877c73f13402c82a76b089090ff1a\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_60a24e88c8014e28bfc75db147f8d7cd\",\n            \"value\": \"Downloading (…)l-00015-of-00046.bin: 100%\"\n          }\n        },\n        \"1c4e665173a64b60b16b325238617bfe\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_6ab3a5774e7b45dfb178b61f8f7971b6\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_5a28c8947e7347e4a77e4cd105f3821f\",\n            \"value\": 910328184\n          }\n        },\n        \"01177211c6e942bb99910e3a4450b447\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_66b9917a04cd414b90ad732da43d2ebf\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_3a92e30dd87048849d978024d17adfae\",\n            \"value\": \" 910M/910M [00:03&lt;00:00, 227MB/s]\"\n          }\n        },\n        \"d9b81389512248eda61573b451e18c96\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"864877c73f13402c82a76b089090ff1a\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"60a24e88c8014e28bfc75db147f8d7cd\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"6ab3a5774e7b45dfb178b61f8f7971b6\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"5a28c8947e7347e4a77e4cd105f3821f\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"66b9917a04cd414b90ad732da43d2ebf\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"3a92e30dd87048849d978024d17adfae\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"b64cbb7874a94dc8811fde3ce5064823\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_3abe17ab46ab454190e0ab5009581190\",\n              \"IPY_MODEL_9ee1f5d6bcba4a22a4646c769f26f2ae\",\n              \"IPY_MODEL_2b44a7cc5d8d4c509dd9179960f0d4fb\"\n            ],\n            \"layout\": \"IPY_MODEL_37d3b49f317343899d0a001b708c8289\"\n          }\n        },\n        \"3abe17ab46ab454190e0ab5009581190\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_bd2c8ad252f04847b3bcb7948d4adb0d\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_d63b16952fbc49d3b1c0fe232863c7d7\",\n            \"value\": \"Downloading (…)l-00016-of-00046.bin: 100%\"\n          }\n        },\n        \"9ee1f5d6bcba4a22a4646c769f26f2ae\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_a018be650f764b9eb0157bfdeb46f525\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_ae2d8a80559546c9873bc14ab9a82f3b\",\n            \"value\": 910328184\n          }\n        },\n        \"2b44a7cc5d8d4c509dd9179960f0d4fb\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_817b79fe562b4d1787cbcd81dcb71b8b\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_4213af025f004d17b900e22da2f502a2\",\n            \"value\": \" 910M/910M [00:11&lt;00:00, 195MB/s]\"\n          }\n        },\n        \"37d3b49f317343899d0a001b708c8289\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"bd2c8ad252f04847b3bcb7948d4adb0d\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"d63b16952fbc49d3b1c0fe232863c7d7\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"a018be650f764b9eb0157bfdeb46f525\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"ae2d8a80559546c9873bc14ab9a82f3b\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"817b79fe562b4d1787cbcd81dcb71b8b\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"4213af025f004d17b900e22da2f502a2\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"717ac9d4a7094de0aa740e9dae45c972\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_51495d20a38044d3aec0e5c1010d716f\",\n              \"IPY_MODEL_6f7da3f317d842cc9a9edc6417a440cc\",\n              \"IPY_MODEL_30dc330e207c4186857d9665a1ed76f8\"\n            ],\n            \"layout\": \"IPY_MODEL_5cf1795eb29944c08f36c9c606e67977\"\n          }\n        },\n        \"51495d20a38044d3aec0e5c1010d716f\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_bb53ce4664e84e8dbce58cc5316cf3b4\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_2cce29639a2a431082bb1ae161568abb\",\n            \"value\": \"Downloading (…)l-00017-of-00046.bin: 100%\"\n          }\n        },\n        \"6f7da3f317d842cc9a9edc6417a440cc\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_f485d9d1c2464655883739e79e5b8ae9\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_c3b4976a88434ec4a18b5fc44b7f86c9\",\n            \"value\": 910328184\n          }\n        },\n        \"30dc330e207c4186857d9665a1ed76f8\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_96eceb46444842a9b43069ef22844828\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_3d99b78943fa4242963eda194503a421\",\n            \"value\": \" 910M/910M [00:03&lt;00:00, 227MB/s]\"\n          }\n        },\n        \"5cf1795eb29944c08f36c9c606e67977\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"bb53ce4664e84e8dbce58cc5316cf3b4\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"2cce29639a2a431082bb1ae161568abb\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"f485d9d1c2464655883739e79e5b8ae9\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"c3b4976a88434ec4a18b5fc44b7f86c9\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"96eceb46444842a9b43069ef22844828\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"3d99b78943fa4242963eda194503a421\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"33ce9e2a338a48ad8f4b607e23c3ba23\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_ab81662b419742dbb262346d48fb8a45\",\n              \"IPY_MODEL_3624a588cd2d4a67a7e725b1a76a0377\",\n              \"IPY_MODEL_951e087f4970473f99e8eb0d795f5807\"\n            ],\n            \"layout\": \"IPY_MODEL_cfe68ac339a44c2da96f59660ddda2a6\"\n          }\n        },\n        \"ab81662b419742dbb262346d48fb8a45\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_4cb06609ca684cd688d40ec30c5d7efa\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_844a92d2c77f4a588d2ff19ec0bd0574\",\n            \"value\": \"Downloading (…)l-00018-of-00046.bin: 100%\"\n          }\n        },\n        \"3624a588cd2d4a67a7e725b1a76a0377\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_b50f27ca4b4647f89ebfabb783d311d9\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_97e506e771a944e38ae6e847bf2c80a2\",\n            \"value\": 910328184\n          }\n        },\n        \"951e087f4970473f99e8eb0d795f5807\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_c4cd9b2090a84ca797adc81e6b155201\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_d36db0d2c3f1436c80a705c2573a884c\",\n            \"value\": \" 910M/910M [00:06&lt;00:00, 283MB/s]\"\n          }\n        },\n        \"cfe68ac339a44c2da96f59660ddda2a6\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"4cb06609ca684cd688d40ec30c5d7efa\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"844a92d2c77f4a588d2ff19ec0bd0574\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"b50f27ca4b4647f89ebfabb783d311d9\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"97e506e771a944e38ae6e847bf2c80a2\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"c4cd9b2090a84ca797adc81e6b155201\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"d36db0d2c3f1436c80a705c2573a884c\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"33eed28c85dc4fff8ed3ca549a72b250\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_f4a50369c17d41ab8ee65836e9f409cc\",\n              \"IPY_MODEL_e689784e942341b799809d663277e3a6\",\n              \"IPY_MODEL_25803c5e86a8436caa0a3b7341d6b24f\"\n            ],\n            \"layout\": \"IPY_MODEL_c81db61b545d49979d8df3b35f2d2337\"\n          }\n        },\n        \"f4a50369c17d41ab8ee65836e9f409cc\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_988bf6ea04e24f419e2c55dac25c7b92\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_ab0b1734f4fa4cb8be00a3fd2c422023\",\n            \"value\": \"Downloading (…)l-00019-of-00046.bin: 100%\"\n          }\n        },\n        \"e689784e942341b799809d663277e3a6\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_37574cd071fe49018b9cd729410f78d6\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_404126188aa8471fbc975c1292c61177\",\n            \"value\": 910328184\n          }\n        },\n        \"25803c5e86a8436caa0a3b7341d6b24f\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_964f9665564a405f8383779f462809a3\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_593fd457a0a647f1ad23772663455075\",\n            \"value\": \" 910M/910M [00:03&lt;00:00, 259MB/s]\"\n          }\n        },\n        \"c81db61b545d49979d8df3b35f2d2337\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"988bf6ea04e24f419e2c55dac25c7b92\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"ab0b1734f4fa4cb8be00a3fd2c422023\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"37574cd071fe49018b9cd729410f78d6\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"404126188aa8471fbc975c1292c61177\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"964f9665564a405f8383779f462809a3\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"593fd457a0a647f1ad23772663455075\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"1c562ef279d7496594b3b0f5e191b2ca\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_02af81a3890a4eb69376481dddb62757\",\n              \"IPY_MODEL_4dcdb8b843ff47c092ee7c812d4820e2\",\n              \"IPY_MODEL_5d5df9b5f06b4679818e5f2e3b69bb3a\"\n            ],\n            \"layout\": \"IPY_MODEL_9d7c0006eba24d48a9a79021a72cab14\"\n          }\n        },\n        \"02af81a3890a4eb69376481dddb62757\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_f5f7ca70572641ee9c3303ff17dc19cf\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_c4c4a8e92e41410bad01974c0516fd5a\",\n            \"value\": \"Downloading (…)l-00020-of-00046.bin: 100%\"\n          }\n        },\n        \"4dcdb8b843ff47c092ee7c812d4820e2\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_21503fdc35034377b332127cff47d2cf\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_11b2774b38944055b16d24f47f715bf1\",\n            \"value\": 910328184\n          }\n        },\n        \"5d5df9b5f06b4679818e5f2e3b69bb3a\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_55b72408c4714609ba5dad380c0107da\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_3b38453d82f94ba3bb2c952e4cf3203b\",\n            \"value\": \" 910M/910M [00:03&lt;00:00, 251MB/s]\"\n          }\n        },\n        \"9d7c0006eba24d48a9a79021a72cab14\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"f5f7ca70572641ee9c3303ff17dc19cf\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"c4c4a8e92e41410bad01974c0516fd5a\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"21503fdc35034377b332127cff47d2cf\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"11b2774b38944055b16d24f47f715bf1\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"55b72408c4714609ba5dad380c0107da\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"3b38453d82f94ba3bb2c952e4cf3203b\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"7152e9a878e94615b1a20f37f31ec8a3\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_adcce8e6fcc74ccb9081c532dabf7954\",\n              \"IPY_MODEL_5b1166971deb41afa3f6710e22dda622\",\n              \"IPY_MODEL_c071eac4a95d42f48cc72ece1a2d3a2e\"\n            ],\n            \"layout\": \"IPY_MODEL_e2ea4c6c32b549fcb79c04b1576b6b6e\"\n          }\n        },\n        \"adcce8e6fcc74ccb9081c532dabf7954\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_9dce8e6885404bef8e13951ebaaf2810\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_35430f86dc0b4b98b0587ab5ffb3c445\",\n            \"value\": \"Downloading (…)l-00021-of-00046.bin: 100%\"\n          }\n        },\n        \"5b1166971deb41afa3f6710e22dda622\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_210fc3b7a05346e29d5b47b3bd62e96d\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_b13a851953974020a7e78ebf80ec39f9\",\n            \"value\": 910328184\n          }\n        },\n        \"c071eac4a95d42f48cc72ece1a2d3a2e\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_5f6043547d8b4e9790d614789c0bfc4d\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_0a0d5615ab01469c9bea66ed9409e855\",\n            \"value\": \" 910M/910M [00:11&lt;00:00, 72.4MB/s]\"\n          }\n        },\n        \"e2ea4c6c32b549fcb79c04b1576b6b6e\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"9dce8e6885404bef8e13951ebaaf2810\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"35430f86dc0b4b98b0587ab5ffb3c445\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"210fc3b7a05346e29d5b47b3bd62e96d\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"b13a851953974020a7e78ebf80ec39f9\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"5f6043547d8b4e9790d614789c0bfc4d\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"0a0d5615ab01469c9bea66ed9409e855\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"d7873eccfc1f4a5e934c8061168b7ff9\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_26016a9811154ebfa85d7537936c68e4\",\n              \"IPY_MODEL_52f118afe6a74797b89fc1aa868de358\",\n              \"IPY_MODEL_60591bf7bf9f4052a4a62c548cb0f7d3\"\n            ],\n            \"layout\": \"IPY_MODEL_a314e4f42e34465fa53e48c7fa46a859\"\n          }\n        },\n        \"26016a9811154ebfa85d7537936c68e4\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_6c2a653d96064901a94cb8705ef5ba67\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_0c637202da564f46ad98f8aa9c6e94b2\",\n            \"value\": \"Downloading (…)l-00022-of-00046.bin: 100%\"\n          }\n        },\n        \"52f118afe6a74797b89fc1aa868de358\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_aa836e70e66642d6bb1f7fbcf7482cf2\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_b14bcb5b3cd440f4955ab25fea1beef3\",\n            \"value\": 910328184\n          }\n        },\n        \"60591bf7bf9f4052a4a62c548cb0f7d3\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_6562fcd1e25b4522801f604ac8553148\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_fef01075c3e74c62bd92e8a668b88f90\",\n            \"value\": \" 910M/910M [00:05&lt;00:00, 294MB/s]\"\n          }\n        },\n        \"a314e4f42e34465fa53e48c7fa46a859\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"6c2a653d96064901a94cb8705ef5ba67\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"0c637202da564f46ad98f8aa9c6e94b2\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"aa836e70e66642d6bb1f7fbcf7482cf2\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"b14bcb5b3cd440f4955ab25fea1beef3\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"6562fcd1e25b4522801f604ac8553148\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"fef01075c3e74c62bd92e8a668b88f90\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"3c90ae0909df4199a1555fd7b8c9980c\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_f5f1e45fac7445899e5678cea2ad0190\",\n              \"IPY_MODEL_752a96af39264fcda7f772bf13a386c0\",\n              \"IPY_MODEL_2c954ee5db5f4bad80dc54ca8f3fa634\"\n            ],\n            \"layout\": \"IPY_MODEL_e5a9f048600549f886af24bfc7df7ad8\"\n          }\n        },\n        \"f5f1e45fac7445899e5678cea2ad0190\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_971c54165ca3468cbdd0f0f5735cb690\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_f8c9ae1c8056424a9582c3c7b5528bcc\",\n            \"value\": \"Downloading (…)l-00023-of-00046.bin: 100%\"\n          }\n        },\n        \"752a96af39264fcda7f772bf13a386c0\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_b4342b8efe7442478188cfeca4e65b80\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_793715ef171e4a91bb3607a0b7b00143\",\n            \"value\": 910328184\n          }\n        },\n        \"2c954ee5db5f4bad80dc54ca8f3fa634\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_858c83c823cc41be9013e760012cb676\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_03659d1840034d76845f786268a2aef3\",\n            \"value\": \" 910M/910M [00:07&lt;00:00, 65.3MB/s]\"\n          }\n        },\n        \"e5a9f048600549f886af24bfc7df7ad8\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"971c54165ca3468cbdd0f0f5735cb690\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"f8c9ae1c8056424a9582c3c7b5528bcc\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"b4342b8efe7442478188cfeca4e65b80\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"793715ef171e4a91bb3607a0b7b00143\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"858c83c823cc41be9013e760012cb676\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"03659d1840034d76845f786268a2aef3\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"1ba58ae904fc48079e86646936816330\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_b48770c25a274ce4aeeb48616c29c53e\",\n              \"IPY_MODEL_1a9840e017a74ea3bd556806fb1bde61\",\n              \"IPY_MODEL_b95d0160c569437d9f3ffdcd0d83fce5\"\n            ],\n            \"layout\": \"IPY_MODEL_2f980bda91d942ea8972ca21c7998103\"\n          }\n        },\n        \"b48770c25a274ce4aeeb48616c29c53e\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_0553b6dc9a09484ea0cc61cb70649748\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_4b48da34bde344ea93bc717e62b4bbb8\",\n            \"value\": \"Downloading (…)l-00024-of-00046.bin: 100%\"\n          }\n        },\n        \"1a9840e017a74ea3bd556806fb1bde61\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_3f4c9e0fc4ed4c24821fa24e61a98b9e\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_675cae1fc5f942d68f1704785f805831\",\n            \"value\": 910328184\n          }\n        },\n        \"b95d0160c569437d9f3ffdcd0d83fce5\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_436a3cd81bf844f89d10a861ad763192\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_7fdce14fef024cc190e5a8c8d5e73c7c\",\n            \"value\": \" 910M/910M [00:03&lt;00:00, 251MB/s]\"\n          }\n        },\n        \"2f980bda91d942ea8972ca21c7998103\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"0553b6dc9a09484ea0cc61cb70649748\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"4b48da34bde344ea93bc717e62b4bbb8\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"3f4c9e0fc4ed4c24821fa24e61a98b9e\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"675cae1fc5f942d68f1704785f805831\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"436a3cd81bf844f89d10a861ad763192\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"7fdce14fef024cc190e5a8c8d5e73c7c\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"b03e33cba1a542758eb0ca4ad3daf3c2\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_1b3188144e7e435a9dd6317915bb8d74\",\n              \"IPY_MODEL_bd9ac4d8be254686a25a796534e30270\",\n              \"IPY_MODEL_e241026e90734fab9fdedb5357a8153f\"\n            ],\n            \"layout\": \"IPY_MODEL_88d5307a7bc04acda9ab001d7d6ab70a\"\n          }\n        },\n        \"1b3188144e7e435a9dd6317915bb8d74\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_8e85eb7198fa44fdaf7401ec5a4b015a\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_41da66fa34144359a7049ca48632af21\",\n            \"value\": \"Downloading (…)l-00025-of-00046.bin: 100%\"\n          }\n        },\n        \"bd9ac4d8be254686a25a796534e30270\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_ae3961cd33e544c1845602589d41d0e7\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_98184a0c8eef4d5fb3665a9a07441dc6\",\n            \"value\": 910328184\n          }\n        },\n        \"e241026e90734fab9fdedb5357a8153f\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_e331cb31fed345b5bcc3151086bc051c\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_1645c00f9d1d44f6b3b590c53cfe7b97\",\n            \"value\": \" 910M/910M [00:07&lt;00:00, 219MB/s]\"\n          }\n        },\n        \"88d5307a7bc04acda9ab001d7d6ab70a\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"8e85eb7198fa44fdaf7401ec5a4b015a\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"41da66fa34144359a7049ca48632af21\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"ae3961cd33e544c1845602589d41d0e7\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"98184a0c8eef4d5fb3665a9a07441dc6\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"e331cb31fed345b5bcc3151086bc051c\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"1645c00f9d1d44f6b3b590c53cfe7b97\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"841a63cb8294465d974f241c7e966d59\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_f98778ffd4874d37a72db041a751de17\",\n              \"IPY_MODEL_224069f59623449fbb0e224837522d2c\",\n              \"IPY_MODEL_77c2673577a34c3387f8dbbcafc99fae\"\n            ],\n            \"layout\": \"IPY_MODEL_855cf3add41741868e713d0302acf560\"\n          }\n        },\n        \"f98778ffd4874d37a72db041a751de17\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_ffbd499631cd4b05b5aef6c0e7321cdb\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_550a5e9cf8be48f58205e8e8c7c0ae56\",\n            \"value\": \"Downloading (…)l-00026-of-00046.bin: 100%\"\n          }\n        },\n        \"224069f59623449fbb0e224837522d2c\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_89538980a0f7469ebba49444fb8da63a\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_89ee0bd3c1e24b6d8863abe82a1ebc58\",\n            \"value\": 910328184\n          }\n        },\n        \"77c2673577a34c3387f8dbbcafc99fae\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_bcff745430fb41839e40f650e7b14cba\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_6a54953f28a54ae18c0d4aaa5cb0387e\",\n            \"value\": \" 910M/910M [00:03&lt;00:00, 257MB/s]\"\n          }\n        },\n        \"855cf3add41741868e713d0302acf560\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"ffbd499631cd4b05b5aef6c0e7321cdb\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"550a5e9cf8be48f58205e8e8c7c0ae56\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"89538980a0f7469ebba49444fb8da63a\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"89ee0bd3c1e24b6d8863abe82a1ebc58\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"bcff745430fb41839e40f650e7b14cba\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"6a54953f28a54ae18c0d4aaa5cb0387e\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"b96d793a9ca54cd190cafb8cf6e470f2\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_91636dd7dca143c19b62297a2fa5ce00\",\n              \"IPY_MODEL_f33334ceba1c4246a189fbfd111f3068\",\n              \"IPY_MODEL_9900afd61ab641a895ffdc8a10e6496b\"\n            ],\n            \"layout\": \"IPY_MODEL_955b0e93751d48809ef95d419453914e\"\n          }\n        },\n        \"91636dd7dca143c19b62297a2fa5ce00\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_136982307cb04bbabfa01a4de219c62a\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_e99d1fcdc87b4f74a607cc98bca5a398\",\n            \"value\": \"Downloading (…)l-00027-of-00046.bin: 100%\"\n          }\n        },\n        \"f33334ceba1c4246a189fbfd111f3068\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_047daa8310e44e63b19aa564b2839837\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_3412f43e49484b3986c88ac669be1901\",\n            \"value\": 910328184\n          }\n        },\n        \"9900afd61ab641a895ffdc8a10e6496b\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_ff7aef963ee0491ab49fc4bd3e0976b5\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_882a1c11c929464aa93963025a5a00ce\",\n            \"value\": \" 910M/910M [00:13&lt;00:00, 104MB/s]\"\n          }\n        },\n        \"955b0e93751d48809ef95d419453914e\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"136982307cb04bbabfa01a4de219c62a\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"e99d1fcdc87b4f74a607cc98bca5a398\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"047daa8310e44e63b19aa564b2839837\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"3412f43e49484b3986c88ac669be1901\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"ff7aef963ee0491ab49fc4bd3e0976b5\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"882a1c11c929464aa93963025a5a00ce\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"b4b53b4f4fce468f98c3ac047385f78c\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_9476364cea034e12ac3f29e6ca2ba557\",\n              \"IPY_MODEL_b420f4a0289345d98c491baf2990e5e9\",\n              \"IPY_MODEL_12ecb306485a4f7b85d62344460f8bfd\"\n            ],\n            \"layout\": \"IPY_MODEL_65546be37586428a824de17d1c038b25\"\n          }\n        },\n        \"9476364cea034e12ac3f29e6ca2ba557\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_6e05196caf734006ba4e3c6d7baa8450\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_cb56376b773047d8a7adf71f72c920ae\",\n            \"value\": \"Downloading (…)l-00028-of-00046.bin: 100%\"\n          }\n        },\n        \"b420f4a0289345d98c491baf2990e5e9\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_d73316ef8c484c2887cc8e307a1b570e\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_75fde083f37d4e318992d56d9778873b\",\n            \"value\": 910328184\n          }\n        },\n        \"12ecb306485a4f7b85d62344460f8bfd\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_9ded2515a84d4ba78e44c1c2f5acc58d\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_ccf04eb1a35d4440b403faf00c7407f8\",\n            \"value\": \" 910M/910M [00:09&lt;00:00, 271MB/s]\"\n          }\n        },\n        \"65546be37586428a824de17d1c038b25\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"6e05196caf734006ba4e3c6d7baa8450\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"cb56376b773047d8a7adf71f72c920ae\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"d73316ef8c484c2887cc8e307a1b570e\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"75fde083f37d4e318992d56d9778873b\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"9ded2515a84d4ba78e44c1c2f5acc58d\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"ccf04eb1a35d4440b403faf00c7407f8\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"178feebf2c524f989e9943c4e30abcf7\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_9498c899ff794d0da03970dd88bb442c\",\n              \"IPY_MODEL_08c234ba5a4541f88020dc79157560dd\",\n              \"IPY_MODEL_d2b8b267ed4b4ab0a8ed8199836a91f7\"\n            ],\n            \"layout\": \"IPY_MODEL_fa2de2fb3dde4f72ac3228bcdccd4e01\"\n          }\n        },\n        \"9498c899ff794d0da03970dd88bb442c\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_4b420c08ff2a498681e8dd59f9f2ac2c\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_7cc4d367c20a4b04b54694dfcf170f74\",\n            \"value\": \"Downloading (…)l-00029-of-00046.bin: 100%\"\n          }\n        },\n        \"08c234ba5a4541f88020dc79157560dd\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_3bbf51b944d94f1bb6a9be1e0b226309\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_2a5437d5d93d49109fe74fd03c39409e\",\n            \"value\": 910328184\n          }\n        },\n        \"d2b8b267ed4b4ab0a8ed8199836a91f7\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_59a2f6ee62014f56b017bdd9f8f0b57f\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_5fb5c37cbdad458cb7c0d7ee2c9328ff\",\n            \"value\": \" 910M/910M [00:05&lt;00:00, 253MB/s]\"\n          }\n        },\n        \"fa2de2fb3dde4f72ac3228bcdccd4e01\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"4b420c08ff2a498681e8dd59f9f2ac2c\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"7cc4d367c20a4b04b54694dfcf170f74\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"3bbf51b944d94f1bb6a9be1e0b226309\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"2a5437d5d93d49109fe74fd03c39409e\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"59a2f6ee62014f56b017bdd9f8f0b57f\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"5fb5c37cbdad458cb7c0d7ee2c9328ff\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"92631b29e6854d708d702cfa0a18ac7e\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_6806ee5ea0a94590923866fc5801e857\",\n              \"IPY_MODEL_682ebeeedfe84274b4af85ff1d2650e7\",\n              \"IPY_MODEL_4b4cff80d741452caca7a622f6d8880a\"\n            ],\n            \"layout\": \"IPY_MODEL_6efee891674a4493aeb4361d118f60bf\"\n          }\n        },\n        \"6806ee5ea0a94590923866fc5801e857\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_9157207c151a424cb78115c8549df716\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_0d92b2b2bda9459791212d5361b478ae\",\n            \"value\": \"Downloading (…)l-00030-of-00046.bin: 100%\"\n          }\n        },\n        \"682ebeeedfe84274b4af85ff1d2650e7\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_e08595a84b36432d8bdd0fdfa9eae05c\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_f59ba68fd3264f94b198b94ae5da3463\",\n            \"value\": 910328184\n          }\n        },\n        \"4b4cff80d741452caca7a622f6d8880a\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_5120030a5fb54687bd6e1ad4647682ad\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_ec3b956f469342f4838eee8b127a5eee\",\n            \"value\": \" 910M/910M [00:03&lt;00:00, 242MB/s]\"\n          }\n        },\n        \"6efee891674a4493aeb4361d118f60bf\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"9157207c151a424cb78115c8549df716\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"0d92b2b2bda9459791212d5361b478ae\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"e08595a84b36432d8bdd0fdfa9eae05c\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"f59ba68fd3264f94b198b94ae5da3463\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"5120030a5fb54687bd6e1ad4647682ad\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"ec3b956f469342f4838eee8b127a5eee\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"75993d21834f4bfe88680cce6ef45920\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_09d71d9e66344b3188f766bebb5c3181\",\n              \"IPY_MODEL_901f923c7eac4576b8ebfa37b228abe1\",\n              \"IPY_MODEL_c893830e66c64d639169f274f3a6e133\"\n            ],\n            \"layout\": \"IPY_MODEL_8dc64a19d272401c99a2a5cd03bcced8\"\n          }\n        },\n        \"09d71d9e66344b3188f766bebb5c3181\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_c51ea2b528d54086b8ba59f1c244b1b6\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_e4bb32bdee5442caa564425b9d722ab1\",\n            \"value\": \"Downloading (…)l-00031-of-00046.bin: 100%\"\n          }\n        },\n        \"901f923c7eac4576b8ebfa37b228abe1\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_88c728cdba7d45418b5508b5b017c119\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_b97d12a1ad1641f5b2ed3fe75d239de7\",\n            \"value\": 910328184\n          }\n        },\n        \"c893830e66c64d639169f274f3a6e133\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_507d78d4c25c4be39c7e8a849e21f637\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_762557b63ff8434482e3f10210a5cca2\",\n            \"value\": \" 910M/910M [00:03&lt;00:00, 201MB/s]\"\n          }\n        },\n        \"8dc64a19d272401c99a2a5cd03bcced8\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"c51ea2b528d54086b8ba59f1c244b1b6\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"e4bb32bdee5442caa564425b9d722ab1\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"88c728cdba7d45418b5508b5b017c119\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"b97d12a1ad1641f5b2ed3fe75d239de7\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"507d78d4c25c4be39c7e8a849e21f637\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"762557b63ff8434482e3f10210a5cca2\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"0bd4726d64bb40cfa7de9a19a6e56c2b\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_e26247ae7b8d468487248f2c8c3b1a42\",\n              \"IPY_MODEL_e6d8198c49824287933934dd1e7a2ad1\",\n              \"IPY_MODEL_98d5c0c40bbc4778b72b932d2b8962f2\"\n            ],\n            \"layout\": \"IPY_MODEL_1a17f81cf890444c8cf494b36bb215d9\"\n          }\n        },\n        \"e26247ae7b8d468487248f2c8c3b1a42\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_e0cf236c7703460389083200a41fd6ec\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_0a8295c3cadd45e2a93ac9b2741c07f1\",\n            \"value\": \"Downloading (…)l-00032-of-00046.bin: 100%\"\n          }\n        },\n        \"e6d8198c49824287933934dd1e7a2ad1\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_f521455989c14fa681987a9cf5b750e6\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_02b06c57c93c4e1891801bd89a712f94\",\n            \"value\": 910328184\n          }\n        },\n        \"98d5c0c40bbc4778b72b932d2b8962f2\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_32277f46720b427291677a655535895a\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_fa79c90a3e154e62b05b771cf6b1c1dd\",\n            \"value\": \" 910M/910M [00:03&lt;00:00, 248MB/s]\"\n          }\n        },\n        \"1a17f81cf890444c8cf494b36bb215d9\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"e0cf236c7703460389083200a41fd6ec\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"0a8295c3cadd45e2a93ac9b2741c07f1\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"f521455989c14fa681987a9cf5b750e6\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"02b06c57c93c4e1891801bd89a712f94\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"32277f46720b427291677a655535895a\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"fa79c90a3e154e62b05b771cf6b1c1dd\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"d1b5d9ca7b66455690b60923e5846eb2\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_9358e1494bad4f779a34df60681c8755\",\n              \"IPY_MODEL_54e9d511e8d649e4b75d92f9a9ecef25\",\n              \"IPY_MODEL_d4ebcc4829b3441d80bf0813fed8262c\"\n            ],\n            \"layout\": \"IPY_MODEL_0b9e6b44680040859cda9ad0592f1a8b\"\n          }\n        },\n        \"9358e1494bad4f779a34df60681c8755\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_04f673c3a73b4b7e875401c78805e400\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_8a58aa290dd84c0bbd725fb1456265c8\",\n            \"value\": \"Downloading (…)l-00033-of-00046.bin: 100%\"\n          }\n        },\n        \"54e9d511e8d649e4b75d92f9a9ecef25\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_15de375958db4c58bc4a57c02a04b995\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_a21dd3e4bdb74d319696691ba96cb8b4\",\n            \"value\": 910328184\n          }\n        },\n        \"d4ebcc4829b3441d80bf0813fed8262c\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_16490e85dfba4ef49e784388a5a9a2e4\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_594e5e9090e04f88ba535dff0b265b5d\",\n            \"value\": \" 910M/910M [00:03&lt;00:00, 178MB/s]\"\n          }\n        },\n        \"0b9e6b44680040859cda9ad0592f1a8b\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"04f673c3a73b4b7e875401c78805e400\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"8a58aa290dd84c0bbd725fb1456265c8\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"15de375958db4c58bc4a57c02a04b995\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"a21dd3e4bdb74d319696691ba96cb8b4\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"16490e85dfba4ef49e784388a5a9a2e4\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"594e5e9090e04f88ba535dff0b265b5d\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"42bbe9b70d384746969b749ba9f35765\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_de6f4a7cb67e42df99dcad5ded846501\",\n              \"IPY_MODEL_e892e00bba3549e09f450ae42eed7896\",\n              \"IPY_MODEL_9ebd056764584729899cf61e36782393\"\n            ],\n            \"layout\": \"IPY_MODEL_8c08a645c5ff49558146192982c25ae3\"\n          }\n        },\n        \"de6f4a7cb67e42df99dcad5ded846501\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_e7ca8628c6324408a1c2362c8b7c07ec\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_c3a7ad5e55d14594b0dc127f7eae2412\",\n            \"value\": \"Downloading (…)l-00034-of-00046.bin: 100%\"\n          }\n        },\n        \"e892e00bba3549e09f450ae42eed7896\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_7c8dd9e21fd643919e6085d99be6a742\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_00405508e43f4c4db6cd1be27d643648\",\n            \"value\": 910328184\n          }\n        },\n        \"9ebd056764584729899cf61e36782393\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_6fa62c48d7c94129b9d1e416eeea75a4\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_6fd1c98b77c545fcac9847e82140ca72\",\n            \"value\": \" 910M/910M [00:04&lt;00:00, 293MB/s]\"\n          }\n        },\n        \"8c08a645c5ff49558146192982c25ae3\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"e7ca8628c6324408a1c2362c8b7c07ec\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"c3a7ad5e55d14594b0dc127f7eae2412\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"7c8dd9e21fd643919e6085d99be6a742\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"00405508e43f4c4db6cd1be27d643648\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"6fa62c48d7c94129b9d1e416eeea75a4\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"6fd1c98b77c545fcac9847e82140ca72\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"9d5ab6c4afb44c86bea8ab38b6514cfc\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_8326df60d2cc4c7a8e02781ed41b9e1e\",\n              \"IPY_MODEL_6767da18ce8d4d86b0e7b8fe96e393c2\",\n              \"IPY_MODEL_be3a2bc3a39441a4a74e7e99a83478cd\"\n            ],\n            \"layout\": \"IPY_MODEL_c776d1747d6b4510aa8b98ea67bcb58d\"\n          }\n        },\n        \"8326df60d2cc4c7a8e02781ed41b9e1e\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_e8cf552c2f0345e8a3f7c87c3bedb52e\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_4735b5f44bf9479380c6589b86998bd3\",\n            \"value\": \"Downloading (…)l-00035-of-00046.bin: 100%\"\n          }\n        },\n        \"6767da18ce8d4d86b0e7b8fe96e393c2\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_eeae561c0d104ffbb1409d710a3531e3\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_7d8faf8f3efe415bbac6e97014265ced\",\n            \"value\": 910328184\n          }\n        },\n        \"be3a2bc3a39441a4a74e7e99a83478cd\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_2df77d291c494ee4b2d12f316a2ad7bf\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_3be17dbae47f49c2a3fc9cbf1ee125fc\",\n            \"value\": \" 910M/910M [00:03&lt;00:00, 260MB/s]\"\n          }\n        },\n        \"c776d1747d6b4510aa8b98ea67bcb58d\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"e8cf552c2f0345e8a3f7c87c3bedb52e\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"4735b5f44bf9479380c6589b86998bd3\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"eeae561c0d104ffbb1409d710a3531e3\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"7d8faf8f3efe415bbac6e97014265ced\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"2df77d291c494ee4b2d12f316a2ad7bf\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"3be17dbae47f49c2a3fc9cbf1ee125fc\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"fad45c9d935b447689da4fda9b8fb10b\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_8b63b5047f2740f88759be1a6ef4667b\",\n              \"IPY_MODEL_16032d320d5f4ee1b2283a5954bb4b01\",\n              \"IPY_MODEL_3f87b2f361eb4497ae9667b675de3608\"\n            ],\n            \"layout\": \"IPY_MODEL_26af4445311a4e8a9676070a80aa468f\"\n          }\n        },\n        \"8b63b5047f2740f88759be1a6ef4667b\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_ba13004801c44455b421c79634d1b5a0\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_eb0245a2bb6b48bcaffc28095eb5a321\",\n            \"value\": \"Downloading (…)l-00036-of-00046.bin: 100%\"\n          }\n        },\n        \"16032d320d5f4ee1b2283a5954bb4b01\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_8286251efcdc4ffe905e8a6e73b618fd\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_08d35063dfba4f8e90053252d7d01d62\",\n            \"value\": 910328184\n          }\n        },\n        \"3f87b2f361eb4497ae9667b675de3608\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_8a42c0b87a6749bba063a793633dcade\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_289ea8304f7f4344aded557fc725bd0b\",\n            \"value\": \" 910M/910M [00:03&lt;00:00, 232MB/s]\"\n          }\n        },\n        \"26af4445311a4e8a9676070a80aa468f\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"ba13004801c44455b421c79634d1b5a0\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"eb0245a2bb6b48bcaffc28095eb5a321\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"8286251efcdc4ffe905e8a6e73b618fd\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"08d35063dfba4f8e90053252d7d01d62\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"8a42c0b87a6749bba063a793633dcade\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"289ea8304f7f4344aded557fc725bd0b\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"f4fea9a4da374008996af29ca366daaf\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_6d52448d893441b8bbda8420932a349e\",\n              \"IPY_MODEL_373bdacc2f424f358e0d8d1125d001cb\",\n              \"IPY_MODEL_64aeae3ef2cb4df69a7d7b28a49352f8\"\n            ],\n            \"layout\": \"IPY_MODEL_9fb436aa8a124a468cfeee45e677a284\"\n          }\n        },\n        \"6d52448d893441b8bbda8420932a349e\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_c1b72c20a3c94ac7af6c51b975a92daf\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_5aa2db30846243aaa9aa097aa3069e18\",\n            \"value\": \"Downloading (…)l-00037-of-00046.bin: 100%\"\n          }\n        },\n        \"373bdacc2f424f358e0d8d1125d001cb\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_a769b7be45044bc29e352ffd4c8eb4c7\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_fd958bc26b4047beb5efcf60d4c4005d\",\n            \"value\": 910328184\n          }\n        },\n        \"64aeae3ef2cb4df69a7d7b28a49352f8\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_bfed6ea906d84bba9eeaa9acd9478257\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_405b50a6908b429db31164eebe2c3185\",\n            \"value\": \" 910M/910M [00:12&lt;00:00, 184MB/s]\"\n          }\n        },\n        \"9fb436aa8a124a468cfeee45e677a284\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"c1b72c20a3c94ac7af6c51b975a92daf\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"5aa2db30846243aaa9aa097aa3069e18\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"a769b7be45044bc29e352ffd4c8eb4c7\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"fd958bc26b4047beb5efcf60d4c4005d\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"bfed6ea906d84bba9eeaa9acd9478257\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"405b50a6908b429db31164eebe2c3185\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"29ed63e571054a6da7f7e3c22d778162\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_6974cdb61fe64bc3b41bb896b9f2cf6e\",\n              \"IPY_MODEL_8de2ddb8f2a547719b47372dd3b1b401\",\n              \"IPY_MODEL_93197b59727441b48f30888a957856a7\"\n            ],\n            \"layout\": \"IPY_MODEL_0cafddc8dbe846e9ad1f7566ac3cb391\"\n          }\n        },\n        \"6974cdb61fe64bc3b41bb896b9f2cf6e\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_fc127c01ec8f402ba250bfeadbad24af\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_9a1e6aa74018404ea3b8d9dbd6c68318\",\n            \"value\": \"Downloading (…)l-00038-of-00046.bin: 100%\"\n          }\n        },\n        \"8de2ddb8f2a547719b47372dd3b1b401\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_186866f1963945299d60c7fa9736a8d9\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_b53909d4c63c4f2da40a6f74f8720d38\",\n            \"value\": 910328184\n          }\n        },\n        \"93197b59727441b48f30888a957856a7\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_b05467bdb6f745c69d15916d72cb454b\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_905e18faa37243359a3f477dc04297dd\",\n            \"value\": \" 910M/910M [00:06&lt;00:00, 244MB/s]\"\n          }\n        },\n        \"0cafddc8dbe846e9ad1f7566ac3cb391\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"fc127c01ec8f402ba250bfeadbad24af\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"9a1e6aa74018404ea3b8d9dbd6c68318\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"186866f1963945299d60c7fa9736a8d9\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"b53909d4c63c4f2da40a6f74f8720d38\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"b05467bdb6f745c69d15916d72cb454b\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"905e18faa37243359a3f477dc04297dd\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"67cb5f74be6f42ed8c5757a979dc73c7\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_f66ea56a983244f5b6e09c2f1e3b41f7\",\n              \"IPY_MODEL_92d21c6e1ec74f6bb7099bf0bed8fd6a\",\n              \"IPY_MODEL_d905b494f26f4b14bf310fa608e7b183\"\n            ],\n            \"layout\": \"IPY_MODEL_f6e0185d77824451a98cf74b5e130b9c\"\n          }\n        },\n        \"f66ea56a983244f5b6e09c2f1e3b41f7\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_2c910835d7af4fea90519bd6c2462a05\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_1e188454444348bc81d917c7affb99bc\",\n            \"value\": \"Downloading (…)l-00039-of-00046.bin: 100%\"\n          }\n        },\n        \"92d21c6e1ec74f6bb7099bf0bed8fd6a\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_dc9757d4e8274e268993eaa959f56093\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_6667e6a6c8654ab2861bb3a3704aeb19\",\n            \"value\": 910328184\n          }\n        },\n        \"d905b494f26f4b14bf310fa608e7b183\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_2d007adbcfd644f3a730295d5837e489\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_c4d99ebbb7534c50b49cba41018b7092\",\n            \"value\": \" 910M/910M [00:04&lt;00:00, 269MB/s]\"\n          }\n        },\n        \"f6e0185d77824451a98cf74b5e130b9c\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"2c910835d7af4fea90519bd6c2462a05\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"1e188454444348bc81d917c7affb99bc\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"dc9757d4e8274e268993eaa959f56093\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"6667e6a6c8654ab2861bb3a3704aeb19\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"2d007adbcfd644f3a730295d5837e489\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"c4d99ebbb7534c50b49cba41018b7092\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"fe22e1525cbf43a2901eab928ec1e71b\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_78d6e3f93f474285bab153571e758b1f\",\n              \"IPY_MODEL_7ba3f9e9e0284937bd83c50bc398b508\",\n              \"IPY_MODEL_c2999e626bac4080b7576ebad1ad7045\"\n            ],\n            \"layout\": \"IPY_MODEL_98622ba516af4606b4fe2a8a88b56ca1\"\n          }\n        },\n        \"78d6e3f93f474285bab153571e758b1f\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_79871994a3a6416a9aa681c0dd0f98ae\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_0bb7e4c23db34b61871e58c820609338\",\n            \"value\": \"Downloading (…)l-00040-of-00046.bin: 100%\"\n          }\n        },\n        \"7ba3f9e9e0284937bd83c50bc398b508\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_a908479ac551494a9e13334ac6bfc0fd\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_5399a856774945c8983fb31c9d853add\",\n            \"value\": 910328184\n          }\n        },\n        \"c2999e626bac4080b7576ebad1ad7045\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_0dec4c617e1b45f99ed431296b6803e3\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_7db935abf7094f559660d4d8a6858e89\",\n            \"value\": \" 910M/910M [00:03&lt;00:00, 242MB/s]\"\n          }\n        },\n        \"98622ba516af4606b4fe2a8a88b56ca1\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"79871994a3a6416a9aa681c0dd0f98ae\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"0bb7e4c23db34b61871e58c820609338\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"a908479ac551494a9e13334ac6bfc0fd\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"5399a856774945c8983fb31c9d853add\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"0dec4c617e1b45f99ed431296b6803e3\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"7db935abf7094f559660d4d8a6858e89\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"fc4fd46da5a24613ba52d4b5344ac3ec\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_e30331900ae44f0fa0aad105ac467d27\",\n              \"IPY_MODEL_63f0b67181414e31b761d3bc58f2c2e4\",\n              \"IPY_MODEL_0b35ba774af84c13b00b06ca03cb145f\"\n            ],\n            \"layout\": \"IPY_MODEL_193d4f7dfaa841c48854648a494321bf\"\n          }\n        },\n        \"e30331900ae44f0fa0aad105ac467d27\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_3340576d3b57404599fcc08deebac3b8\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_4e1516fd31df4892a8a29d7e359866a2\",\n            \"value\": \"Downloading (…)l-00041-of-00046.bin: 100%\"\n          }\n        },\n        \"63f0b67181414e31b761d3bc58f2c2e4\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_d2bc4fa779784e95b5d18a2f88272996\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_fb14afd7402e4af5ab1a66239b5f50cc\",\n            \"value\": 910328184\n          }\n        },\n        \"0b35ba774af84c13b00b06ca03cb145f\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_813b1bfdac4842828fc929b884d6348b\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_7612dbc295f64405b74b907230e727b3\",\n            \"value\": \" 910M/910M [00:10&lt;00:00, 237MB/s]\"\n          }\n        },\n        \"193d4f7dfaa841c48854648a494321bf\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"3340576d3b57404599fcc08deebac3b8\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"4e1516fd31df4892a8a29d7e359866a2\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"d2bc4fa779784e95b5d18a2f88272996\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"fb14afd7402e4af5ab1a66239b5f50cc\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"813b1bfdac4842828fc929b884d6348b\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"7612dbc295f64405b74b907230e727b3\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"f272074531e0421391f1a1a214d912e7\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_ca92f2b98b3646f294cec546eaceac90\",\n              \"IPY_MODEL_45b0889f87ad4e689c6cbdb1790a6aba\",\n              \"IPY_MODEL_f68a9c9a98e44336a8bb5557a7c18444\"\n            ],\n            \"layout\": \"IPY_MODEL_a3b1e0fadb794e46b581890a346eeec6\"\n          }\n        },\n        \"ca92f2b98b3646f294cec546eaceac90\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_6dd791d0e0a64f8f8b69e83434088c7b\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_d4c33048bb25493fbc8c04331812dd0a\",\n            \"value\": \"Downloading (…)l-00042-of-00046.bin: 100%\"\n          }\n        },\n        \"45b0889f87ad4e689c6cbdb1790a6aba\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_3645e270ed33453aa0f3e7e563bac6a1\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_ca7e00c2965449d7a2acbf320736c3ae\",\n            \"value\": 910328184\n          }\n        },\n        \"f68a9c9a98e44336a8bb5557a7c18444\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_d4248737387f4344b157e955a6383d82\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_7e870beb473c4a3ea9fcf4f2c1495d09\",\n            \"value\": \" 910M/910M [00:09&lt;00:00, 280MB/s]\"\n          }\n        },\n        \"a3b1e0fadb794e46b581890a346eeec6\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"6dd791d0e0a64f8f8b69e83434088c7b\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"d4c33048bb25493fbc8c04331812dd0a\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"3645e270ed33453aa0f3e7e563bac6a1\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"ca7e00c2965449d7a2acbf320736c3ae\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"d4248737387f4344b157e955a6383d82\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"7e870beb473c4a3ea9fcf4f2c1495d09\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"eadd0ad10ef14199b4eabcfc74f84efc\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_1af186c96a144b18af38ee964121a184\",\n              \"IPY_MODEL_68cea25fe0e74326a4d74a16fff5aa41\",\n              \"IPY_MODEL_961d0f2d17324effa4289951a481062d\"\n            ],\n            \"layout\": \"IPY_MODEL_abb70d1655c04f63a4f913df934c6566\"\n          }\n        },\n        \"1af186c96a144b18af38ee964121a184\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_61e7833fb298417fb3453af2813a99f9\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_61496bd04431439ea0bfe37af1803916\",\n            \"value\": \"Downloading (…)l-00043-of-00046.bin: 100%\"\n          }\n        },\n        \"68cea25fe0e74326a4d74a16fff5aa41\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_924ac6ca25864079af5e07234fb074aa\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_7261d8a4fdce4034a778989e9801a1f3\",\n            \"value\": 910328184\n          }\n        },\n        \"961d0f2d17324effa4289951a481062d\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_fd3c12a07a274ede8098c306685949fe\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_880fd220af564e67b1251283902157e5\",\n            \"value\": \" 910M/910M [00:05&lt;00:00, 299MB/s]\"\n          }\n        },\n        \"abb70d1655c04f63a4f913df934c6566\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"61e7833fb298417fb3453af2813a99f9\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"61496bd04431439ea0bfe37af1803916\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"924ac6ca25864079af5e07234fb074aa\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"7261d8a4fdce4034a778989e9801a1f3\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"fd3c12a07a274ede8098c306685949fe\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"880fd220af564e67b1251283902157e5\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"7c1dcef2827749b2aa270ca386b33756\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_f5b83a9a7ead48829887a53b950870a6\",\n              \"IPY_MODEL_03fc8ba5edc842b58e94ca9de9c926d4\",\n              \"IPY_MODEL_fe79a709c85841b98e5ee72ff91990ef\"\n            ],\n            \"layout\": \"IPY_MODEL_1cb3356f90394ca5b69edccac816827e\"\n          }\n        },\n        \"f5b83a9a7ead48829887a53b950870a6\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_a718f32db99f4e809e1c51c58eef8268\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_254889affd2e4f139720740e709a289b\",\n            \"value\": \"Downloading (…)l-00044-of-00046.bin: 100%\"\n          }\n        },\n        \"03fc8ba5edc842b58e94ca9de9c926d4\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_e1ffb78ae02f4ddeb2b37dd7f31895e8\",\n            \"max\": 910328184,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_ab3dd37580874852b0800d0c87793bfd\",\n            \"value\": 910328184\n          }\n        },\n        \"fe79a709c85841b98e5ee72ff91990ef\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_882daeceef624f3bbf52d2c8cbc021eb\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_faa3b04c548e44f08a90f845086cba54\",\n            \"value\": \" 910M/910M [00:07&lt;00:00, 35.7MB/s]\"\n          }\n        },\n        \"1cb3356f90394ca5b69edccac816827e\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"a718f32db99f4e809e1c51c58eef8268\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"254889affd2e4f139720740e709a289b\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"e1ffb78ae02f4ddeb2b37dd7f31895e8\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"ab3dd37580874852b0800d0c87793bfd\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"882daeceef624f3bbf52d2c8cbc021eb\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"faa3b04c548e44f08a90f845086cba54\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"6cca6a67d7bc4b5bac7e6b8ae120659f\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_9c6f544bb6da4252a59965df8e62075d\",\n              \"IPY_MODEL_712bb50c8a154532a2a99a569c02ed01\",\n              \"IPY_MODEL_79ecf447e45f4056bc8b11a96d1735a2\"\n            ],\n            \"layout\": \"IPY_MODEL_e173c5176713482c921f7761d7781fd5\"\n          }\n        },\n        \"9c6f544bb6da4252a59965df8e62075d\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_d3a78bb198174512b362b18c4464e8a7\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_5196839e802d44bab597bcdcd11cf796\",\n            \"value\": \"Downloading (…)l-00045-of-00046.bin: 100%\"\n          }\n        },\n        \"712bb50c8a154532a2a99a569c02ed01\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_baff9ad8a46546fe99245d1e7069518b\",\n            \"max\": 604067735,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_f8c35ac29d2f44bea6e5b2fc077afe90\",\n            \"value\": 604067735\n          }\n        },\n        \"79ecf447e45f4056bc8b11a96d1735a2\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_e41f5054b2f5427181000dc84b0092de\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_1e06550a06f14dce998278f321a9eec3\",\n            \"value\": \" 604M/604M [00:02&lt;00:00, 283MB/s]\"\n          }\n        },\n        \"e173c5176713482c921f7761d7781fd5\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"d3a78bb198174512b362b18c4464e8a7\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"5196839e802d44bab597bcdcd11cf796\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"baff9ad8a46546fe99245d1e7069518b\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"f8c35ac29d2f44bea6e5b2fc077afe90\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"e41f5054b2f5427181000dc84b0092de\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"1e06550a06f14dce998278f321a9eec3\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"b6c0c923b50c4d10a7ca77717c2e6629\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_ea4263560a734d388c028f0b3fdec4a1\",\n              \"IPY_MODEL_9e361f6a71e34b2193da9a80886c7622\",\n              \"IPY_MODEL_725b833e25a84c569779d6917dc840d6\"\n            ],\n            \"layout\": \"IPY_MODEL_d74e3baaaf164a5babcbe6590954be6f\"\n          }\n        },\n        \"ea4263560a734d388c028f0b3fdec4a1\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_672b9c36a7a6489eb6cc7788e604ec7c\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_e0f0899196654747b9cfdd79bdb2d125\",\n            \"value\": \"Downloading (…)l-00046-of-00046.bin: 100%\"\n          }\n        },\n        \"9e361f6a71e34b2193da9a80886c7622\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_d9aee99605884feea4739c62cf066029\",\n            \"max\": 619709163,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_d1e85534930f4eeda07099642b07f816\",\n            \"value\": 619709163\n          }\n        },\n        \"725b833e25a84c569779d6917dc840d6\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_3b9399e715e34c6eadcc203fda73a886\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_9b56dad3aab74dfb90245fa5a4e0215d\",\n            \"value\": \" 620M/620M [00:03&lt;00:00, 75.9MB/s]\"\n          }\n        },\n        \"d74e3baaaf164a5babcbe6590954be6f\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"672b9c36a7a6489eb6cc7788e604ec7c\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"e0f0899196654747b9cfdd79bdb2d125\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"d9aee99605884feea4739c62cf066029\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"d1e85534930f4eeda07099642b07f816\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"3b9399e715e34c6eadcc203fda73a886\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"9b56dad3aab74dfb90245fa5a4e0215d\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"fef6669cd33a4ad7b15704f6fdeacf22\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_706cd5f292a442729607c3689cde97ff\",\n              \"IPY_MODEL_d63cb79240b44890924db96953e29351\",\n              \"IPY_MODEL_f48581b1364648798ded66e5300d22e5\"\n            ],\n            \"layout\": \"IPY_MODEL_171ff31740b74177b198115497b8b42d\"\n          }\n        },\n        \"706cd5f292a442729607c3689cde97ff\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_07eb591d0c4d4dc8a3a6f9452b7294ad\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_6cf056fa03c4450d8804916ad08aa64b\",\n            \"value\": \"Loading checkpoint shards: 100%\"\n          }\n        },\n        \"d63cb79240b44890924db96953e29351\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_8c2290c4a29c4aa78ade41dd6cbec5e6\",\n            \"max\": 46,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_9ff0bdcf66804998836e0ece1e2978e5\",\n            \"value\": 46\n          }\n        },\n        \"f48581b1364648798ded66e5300d22e5\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_c2b4e4345bd94f6780718e26cfeef0c8\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_9a692c5494ab444eacf25307ad0b44a6\",\n            \"value\": \" 46/46 [04:49&lt;00:00,  5.25s/it]\"\n          }\n        },\n        \"171ff31740b74177b198115497b8b42d\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"07eb591d0c4d4dc8a3a6f9452b7294ad\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"6cf056fa03c4450d8804916ad08aa64b\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"8c2290c4a29c4aa78ade41dd6cbec5e6\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"9ff0bdcf66804998836e0ece1e2978e5\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"c2b4e4345bd94f6780718e26cfeef0c8\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"9a692c5494ab444eacf25307ad0b44a6\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"07882da20e7740bb9d2c8a3704d00c92\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_3ecf029b84ea46a9ac7b85aa0d2e07d2\",\n              \"IPY_MODEL_80c43d494b74485d86f5a85b1a3c0101\",\n              \"IPY_MODEL_e51a711db6ae462589fbde4f8c4d1032\"\n            ],\n            \"layout\": \"IPY_MODEL_62b4817a071c4d38a446cf2836407838\"\n          }\n        },\n        \"3ecf029b84ea46a9ac7b85aa0d2e07d2\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_6f8a29d2f4034cffa5201853f3ba3726\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_be0e55802e604a9681bccba4db044103\",\n            \"value\": \"Downloading readme: 100%\"\n          }\n        },\n        \"80c43d494b74485d86f5a85b1a3c0101\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_25ccac5c9da94eabae3d517e04f5ac43\",\n            \"max\": 5554,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_7cec16b3591d477b8d395944e8a70ebf\",\n            \"value\": 5554\n          }\n        },\n        \"e51a711db6ae462589fbde4f8c4d1032\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_19df92341e3d413b84826f7cc6931061\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_b2d8b323c0984353a697716ed7a6d127\",\n            \"value\": \" 5.55k/5.55k [00:00&lt;00:00, 330kB/s]\"\n          }\n        },\n        \"62b4817a071c4d38a446cf2836407838\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"6f8a29d2f4034cffa5201853f3ba3726\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"be0e55802e604a9681bccba4db044103\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"25ccac5c9da94eabae3d517e04f5ac43\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"7cec16b3591d477b8d395944e8a70ebf\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"19df92341e3d413b84826f7cc6931061\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"b2d8b323c0984353a697716ed7a6d127\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"7e6769a945d649ec82ef7ed2c2e3357f\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_9db11bf2430f4493b01ce9e814ddb4a7\",\n              \"IPY_MODEL_cd250601aece4c1cb48a639b0f25d118\",\n              \"IPY_MODEL_a2842acc3b8143b9b11b279052097f3a\"\n            ],\n            \"layout\": \"IPY_MODEL_8506ac0bb0204961afc7453d0c077680\"\n          }\n        },\n        \"9db11bf2430f4493b01ce9e814ddb4a7\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_5449f345b9584403b92ccaadcc009593\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_9c084c63a4e1476986a1389cb2a43eeb\",\n            \"value\": \"Downloading data files: 100%\"\n          }\n        },\n        \"cd250601aece4c1cb48a639b0f25d118\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_d0167506afcc4602bd80a288bf191127\",\n            \"max\": 1,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_5ca20d88ea7a4b4dbe6fef01928ac183\",\n            \"value\": 1\n          }\n        },\n        \"a2842acc3b8143b9b11b279052097f3a\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_1a24ab331b7a43fb8a8075c7f15b6541\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_59610889b58644979dcfb1440550d0df\",\n            \"value\": \" 1/1 [00:00&lt;00:00,  2.65it/s]\"\n          }\n        },\n        \"8506ac0bb0204961afc7453d0c077680\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"5449f345b9584403b92ccaadcc009593\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"9c084c63a4e1476986a1389cb2a43eeb\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"d0167506afcc4602bd80a288bf191127\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"5ca20d88ea7a4b4dbe6fef01928ac183\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"1a24ab331b7a43fb8a8075c7f15b6541\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"59610889b58644979dcfb1440550d0df\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"73e1afe0180149f68988402ce03412c3\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_514d45ef359c44498c85dcfb58682111\",\n              \"IPY_MODEL_03077ea71a23485aabe52788f8ebab78\",\n              \"IPY_MODEL_0714173ebbb14d0d83782e0f0412567b\"\n            ],\n            \"layout\": \"IPY_MODEL_c2d371e15b374beda61ebc435981cd27\"\n          }\n        },\n        \"514d45ef359c44498c85dcfb58682111\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_bf709f8bfc744e448d7367c8f43f1853\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_42e3d0c1fdb946469114ba0388e6b6ff\",\n            \"value\": \"Downloading data: 100%\"\n          }\n        },\n        \"03077ea71a23485aabe52788f8ebab78\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_fbaadca189b448a3b17449b74136ffd9\",\n            \"max\": 646739,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_421c682331134cf0b24b361b0245e15c\",\n            \"value\": 646739\n          }\n        },\n        \"0714173ebbb14d0d83782e0f0412567b\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_e9f39163fa344342a7c659a3cb782e46\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_fcaceddd0d8c4847ad8ce2490d126f6d\",\n            \"value\": \" 647k/647k [00:00&lt;00:00, 13.4MB/s]\"\n          }\n        },\n        \"c2d371e15b374beda61ebc435981cd27\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"bf709f8bfc744e448d7367c8f43f1853\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"42e3d0c1fdb946469114ba0388e6b6ff\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"fbaadca189b448a3b17449b74136ffd9\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"421c682331134cf0b24b361b0245e15c\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"e9f39163fa344342a7c659a3cb782e46\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"fcaceddd0d8c4847ad8ce2490d126f6d\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"cf1bbb51baeb41918c4e35528337807f\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_80d90b93cc72422c908d1cd3dbf5c8f5\",\n              \"IPY_MODEL_6182639b2982481d93ef75b50dab082a\",\n              \"IPY_MODEL_baa95e82bca9445ebce3cf22280b950a\"\n            ],\n            \"layout\": \"IPY_MODEL_85b47afe6fbc4710b71f187781e08ec7\"\n          }\n        },\n        \"80d90b93cc72422c908d1cd3dbf5c8f5\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_cade5b7ee3304ecdafdd17028e19c7cd\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_e790c00fe7a647c3973259d698c26c05\",\n            \"value\": \"Extracting data files: 100%\"\n          }\n        },\n        \"6182639b2982481d93ef75b50dab082a\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_18c32d78b8cf4cedbb55b68073aef898\",\n            \"max\": 1,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_72a104f448284a14bb287afeced5bda9\",\n            \"value\": 1\n          }\n        },\n        \"baa95e82bca9445ebce3cf22280b950a\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_1228a27445f8423580bf6a717f6629a6\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_9bde4e4848e64c4c94ab27f7f5cec308\",\n            \"value\": \" 1/1 [00:00&lt;00:00, 30.89it/s]\"\n          }\n        },\n        \"85b47afe6fbc4710b71f187781e08ec7\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"cade5b7ee3304ecdafdd17028e19c7cd\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"e790c00fe7a647c3973259d698c26c05\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"18c32d78b8cf4cedbb55b68073aef898\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"72a104f448284a14bb287afeced5bda9\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"1228a27445f8423580bf6a717f6629a6\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"9bde4e4848e64c4c94ab27f7f5cec308\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"8ebb69d66f354fc48694784a8f36ec63\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_8647a6d17b194650b404918e220a7ee0\",\n              \"IPY_MODEL_379d5e2345e34c5889e531f0fc455741\",\n              \"IPY_MODEL_92c4a7037df4432788661e13d5e668ec\"\n            ],\n            \"layout\": \"IPY_MODEL_bcac72f7ce134638b39f033c0f568fb3\"\n          }\n        },\n        \"8647a6d17b194650b404918e220a7ee0\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_18374440528a41ef9a8f11367320c171\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_80634feb58104ccb85ce4696c6a69e31\",\n            \"value\": \"Generating train split: \"\n          }\n        },\n        \"379d5e2345e34c5889e531f0fc455741\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"info\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_4723270e31f44e42b71c093eb9604258\",\n            \"max\": 1,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_dd195f8647524e04bc9480f2fa85f5ce\",\n            \"value\": 1\n          }\n        },\n        \"92c4a7037df4432788661e13d5e668ec\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_2096ac97506f4fc2aa2a70bee61aea72\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_a7c6c15bc3ef471fb0d4e28adadbe41d\",\n            \"value\": \" 0/0 [00:00&lt;?, ? examples/s]\"\n          }\n        },\n        \"bcac72f7ce134638b39f033c0f568fb3\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": \"hidden\",\n            \"width\": null\n          }\n        },\n        \"18374440528a41ef9a8f11367320c171\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"80634feb58104ccb85ce4696c6a69e31\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"4723270e31f44e42b71c093eb9604258\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": \"20px\"\n          }\n        },\n        \"dd195f8647524e04bc9480f2fa85f5ce\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"2096ac97506f4fc2aa2a70bee61aea72\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"a7c6c15bc3ef471fb0d4e28adadbe41d\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"b2b6722f496a49918740dfc9a97789ab\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_ef5207843c4a4aa8b1e366a5d0ae7536\",\n              \"IPY_MODEL_4344f1b57dd5424e9e5c046aa42cfe35\",\n              \"IPY_MODEL_76f31d54e20b45269e2128340384374e\"\n            ],\n            \"layout\": \"IPY_MODEL_18732888c00f442bae580b94438e1916\"\n          }\n        },\n        \"ef5207843c4a4aa8b1e366a5d0ae7536\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_f0d84153556e4bdbbb73c1dd80974d24\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_661c5bf64cff4f1893865982c68ca71c\",\n            \"value\": \"100%\"\n          }\n        },\n        \"4344f1b57dd5424e9e5c046aa42cfe35\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"success\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_4184603a7e74411a9bec05d6b5939694\",\n            \"max\": 1,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_477d9f12c1964aeba9258b7990f51aa8\",\n            \"value\": 1\n          }\n        },\n        \"76f31d54e20b45269e2128340384374e\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_63cb22480eae40c7a91796718fe78fb3\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_90feb97f2c20432eb2f54485ea8d3087\",\n            \"value\": \" 1/1 [00:00&lt;00:00, 29.84it/s]\"\n          }\n        },\n        \"18732888c00f442bae580b94438e1916\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"f0d84153556e4bdbbb73c1dd80974d24\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"661c5bf64cff4f1893865982c68ca71c\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"4184603a7e74411a9bec05d6b5939694\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"477d9f12c1964aeba9258b7990f51aa8\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"63cb22480eae40c7a91796718fe78fb3\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"90feb97f2c20432eb2f54485ea8d3087\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"6ec3c77d01994f0a97bcd83c542ea8a9\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HBoxModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HBoxView\",\n            \"box_style\": \"\",\n            \"children\": [\n              \"IPY_MODEL_2c8f8046c269429a894a1910cfdd5921\",\n              \"IPY_MODEL_55608d21c20641a5a9798e721c84290a\",\n              \"IPY_MODEL_0c14280af7d04e23bc0bd0a83b4af90e\"\n            ],\n            \"layout\": \"IPY_MODEL_b767cbf30ee6433ba8c006ded500df2b\"\n          }\n        },\n        \"2c8f8046c269429a894a1910cfdd5921\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_6a40b6ae495c4ef882611b4a066a5ae2\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_5924fee94134449ea78570bce67d3aef\",\n            \"value\": \"Map:  80%\"\n          }\n        },\n        \"55608d21c20641a5a9798e721c84290a\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"ProgressView\",\n            \"bar_style\": \"\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_8af4e90d1af944c98e6d95f75fbe6c11\",\n            \"max\": 2508,\n            \"min\": 0,\n            \"orientation\": \"horizontal\",\n            \"style\": \"IPY_MODEL_22b3781abd96465c8ae8740432a95732\",\n            \"value\": 2508\n          }\n        },\n        \"0c14280af7d04e23bc0bd0a83b4af90e\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_dom_classes\": [],\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"HTMLModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_view_module_version\": \"1.5.0\",\n            \"_view_name\": \"HTMLView\",\n            \"description\": \"\",\n            \"description_tooltip\": null,\n            \"layout\": \"IPY_MODEL_ecfb53b9fa4b43eab3c815ee9a90d3c7\",\n            \"placeholder\": \"​\",\n            \"style\": \"IPY_MODEL_144b47000c76414fb7051640e6d0bb33\",\n            \"value\": \" 2000/2508 [00:00&lt;00:00, 5704.22 examples/s]\"\n          }\n        },\n        \"b767cbf30ee6433ba8c006ded500df2b\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": \"hidden\",\n            \"width\": null\n          }\n        },\n        \"6a40b6ae495c4ef882611b4a066a5ae2\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"5924fee94134449ea78570bce67d3aef\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        },\n        \"8af4e90d1af944c98e6d95f75fbe6c11\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"22b3781abd96465c8ae8740432a95732\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"bar_color\": null,\n            \"description_width\": \"\"\n          }\n        },\n        \"ecfb53b9fa4b43eab3c815ee9a90d3c7\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"model_module_version\": \"1.2.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.2.0\",\n            \"_model_name\": \"LayoutModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"LayoutView\",\n            \"align_content\": null,\n            \"align_items\": null,\n            \"align_self\": null,\n            \"border\": null,\n            \"bottom\": null,\n            \"display\": null,\n            \"flex\": null,\n            \"flex_flow\": null,\n            \"grid_area\": null,\n            \"grid_auto_columns\": null,\n            \"grid_auto_flow\": null,\n            \"grid_auto_rows\": null,\n            \"grid_column\": null,\n            \"grid_gap\": null,\n            \"grid_row\": null,\n            \"grid_template_areas\": null,\n            \"grid_template_columns\": null,\n            \"grid_template_rows\": null,\n            \"height\": null,\n            \"justify_content\": null,\n            \"justify_items\": null,\n            \"left\": null,\n            \"margin\": null,\n            \"max_height\": null,\n            \"max_width\": null,\n            \"min_height\": null,\n            \"min_width\": null,\n            \"object_fit\": null,\n            \"object_position\": null,\n            \"order\": null,\n            \"overflow\": null,\n            \"overflow_x\": null,\n            \"overflow_y\": null,\n            \"padding\": null,\n            \"right\": null,\n            \"top\": null,\n            \"visibility\": null,\n            \"width\": null\n          }\n        },\n        \"144b47000c76414fb7051640e6d0bb33\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"model_module_version\": \"1.5.0\",\n          \"state\": {\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"_view_count\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_view_module_version\": \"1.2.0\",\n            \"_view_name\": \"StyleView\",\n            \"description_width\": \"\"\n          }\n        }\n      }\n    }\n  },\n  \"cells\": [\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"view-in-github\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"<a href=\\\"https://colab.research.google.com/github/santhoshkolloju/Abstractive-Summarization-With-Transfer-Learning/blob/master/bnb_4bit_training.ipynb\\\" target=\\\"_parent\\\"><img src=\\\"https://colab.research.google.com/assets/colab-badge.svg\\\" alt=\\\"Open In Colab\\\"/></a>\"\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"source\": [\n        \"# `transformers` meets `bitsandbytes` for democratzing Large Language Models (LLMs) through 4bit quantization\\n\",\n        \"\\n\",\n        \"<center>\\n\",\n        \"<img src=\\\"https://github.com/huggingface/blog/blob/main/assets/96_hf_bitsandbytes_integration/Thumbnail_blue.png?raw=true\\\" alt=\\\"drawing\\\" width=\\\"700\\\" class=\\\"center\\\"/>\\n\",\n        \"</center>\\n\",\n        \"\\n\",\n        \"Welcome to this notebook that goes through the recent `bitsandbytes` integration that includes the work from XXX that introduces no performance degradation 4bit quantization techniques, for democratizing LLMs inference and training.\\n\",\n        \"\\n\",\n        \"In this notebook, we will learn together how to load a large model in 4bit (`gpt-neo-x-20b`) and train it using Google Colab and PEFT library from Hugging Face 🤗.\\n\",\n        \"\\n\",\n        \"[In the general usage notebook](https://colab.research.google.com/drive/1ge2F1QSK8Q7h0hn3YKuBCOAS0bK8E0wf?usp=sharing), you can learn how to propely load a model in 4bit with all its variants. \\n\",\n        \"\\n\",\n        \"If you liked the previous work for integrating [*LLM.int8*](https://arxiv.org/abs/2208.07339), you can have a look at the [introduction blogpost](https://huggingface.co/blog/hf-bitsandbytes-integration) to lean more about that quantization method.\\n\"\n      ],\n      \"metadata\": {\n        \"id\": \"XIyP_0r6zuVc\"\n      }\n    },\n    {\n      \"cell_type\": \"code\",\n      \"execution_count\": null,\n      \"metadata\": {\n        \"id\": \"FuXIFTFapAMI\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\"\n        },\n        \"outputId\": \"735d5db2-a75e-4d7d-fa09-666269548ef3\"\n      },\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"name\": \"stdout\",\n          \"text\": [\n            \"\\u001b[2K     \\u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\\u001b[0m \\u001b[32m92.2/92.2 MB\\u001b[0m \\u001b[31m9.0 MB/s\\u001b[0m eta \\u001b[36m0:00:00\\u001b[0m\\n\",\n            \"\\u001b[?25h  Installing build dependencies ... \\u001b[?25l\\u001b[?25hdone\\n\",\n            \"  Getting requirements to build wheel ... \\u001b[?25l\\u001b[?25hdone\\n\",\n            \"  Preparing metadata (pyproject.toml) ... \\u001b[?25l\\u001b[?25hdone\\n\",\n            \"\\u001b[2K     \\u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\\u001b[0m \\u001b[32m224.5/224.5 kB\\u001b[0m \\u001b[31m9.7 MB/s\\u001b[0m eta \\u001b[36m0:00:00\\u001b[0m\\n\",\n            \"\\u001b[2K     \\u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\\u001b[0m \\u001b[32m7.8/7.8 MB\\u001b[0m \\u001b[31m99.6 MB/s\\u001b[0m eta \\u001b[36m0:00:00\\u001b[0m\\n\",\n            \"\\u001b[?25h  Building wheel for transformers (pyproject.toml) ... \\u001b[?25l\\u001b[?25hdone\\n\",\n            \"  Installing build dependencies ... \\u001b[?25l\\u001b[?25hdone\\n\",\n            \"  Getting requirements to build wheel ... \\u001b[?25l\\u001b[?25hdone\\n\",\n            \"  Preparing metadata (pyproject.toml) ... \\u001b[?25l\\u001b[?25hdone\\n\",\n            \"\\u001b[2K     \\u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\\u001b[0m \\u001b[32m219.1/219.1 kB\\u001b[0m \\u001b[31m8.9 MB/s\\u001b[0m eta \\u001b[36m0:00:00\\u001b[0m\\n\",\n            \"\\u001b[?25h  Building wheel for peft (pyproject.toml) ... \\u001b[?25l\\u001b[?25hdone\\n\",\n            \"  Installing build dependencies ... \\u001b[?25l\\u001b[?25hdone\\n\",\n            \"  Getting requirements to build wheel ... \\u001b[?25l\\u001b[?25hdone\\n\",\n            \"  Preparing metadata (pyproject.toml) ... \\u001b[?25l\\u001b[?25hdone\\n\",\n            \"  Building wheel for accelerate (pyproject.toml) ... \\u001b[?25l\\u001b[?25hdone\\n\",\n            \"\\u001b[2K     \\u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\\u001b[0m \\u001b[32m474.6/474.6 kB\\u001b[0m \\u001b[31m14.3 MB/s\\u001b[0m eta \\u001b[36m0:00:00\\u001b[0m\\n\",\n            \"\\u001b[2K     \\u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\\u001b[0m \\u001b[32m110.5/110.5 kB\\u001b[0m \\u001b[31m14.4 MB/s\\u001b[0m eta \\u001b[36m0:00:00\\u001b[0m\\n\",\n            \"\\u001b[2K     \\u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\\u001b[0m \\u001b[32m212.5/212.5 kB\\u001b[0m \\u001b[31m23.2 MB/s\\u001b[0m eta \\u001b[36m0:00:00\\u001b[0m\\n\",\n            \"\\u001b[2K     \\u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\\u001b[0m \\u001b[32m134.3/134.3 kB\\u001b[0m \\u001b[31m17.1 MB/s\\u001b[0m eta \\u001b[36m0:00:00\\u001b[0m\\n\",\n            \"\\u001b[2K     \\u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\\u001b[0m \\u001b[32m1.0/1.0 MB\\u001b[0m \\u001b[31m45.9 MB/s\\u001b[0m eta \\u001b[36m0:00:00\\u001b[0m\\n\",\n            \"\\u001b[2K     \\u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\\u001b[0m \\u001b[32m114.5/114.5 kB\\u001b[0m \\u001b[31m12.2 MB/s\\u001b[0m eta \\u001b[36m0:00:00\\u001b[0m\\n\",\n            \"\\u001b[2K     \\u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\\u001b[0m \\u001b[32m268.8/268.8 kB\\u001b[0m \\u001b[31m27.3 MB/s\\u001b[0m eta \\u001b[36m0:00:00\\u001b[0m\\n\",\n            \"\\u001b[2K     \\u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\\u001b[0m \\u001b[32m149.6/149.6 kB\\u001b[0m \\u001b[31m18.6 MB/s\\u001b[0m eta \\u001b[36m0:00:00\\u001b[0m\\n\",\n            \"\\u001b[?25h\"\n          ]\n        }\n      ],\n      \"source\": [\n        \"!pip install -q -U bitsandbytes\\n\",\n        \"!pip install -q -U git+https://github.com/huggingface/transformers.git \\n\",\n        \"!pip install -q -U git+https://github.com/huggingface/peft.git\\n\",\n        \"!pip install -q -U git+https://github.com/huggingface/accelerate.git\\n\",\n        \"!pip install -q datasets\"\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"source\": [\n        \"First let's load the model we are going to use - GPT-neo-x-20B! Note that the model itself is around 40GB in half precision\"\n      ],\n      \"metadata\": {\n        \"id\": \"MJ-5idQwzvg-\"\n      }\n    },\n    {\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"import torch\\n\",\n        \"from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig\\n\",\n        \"\\n\",\n        \"model_id = \\\"EleutherAI/gpt-neox-20b\\\"\\n\",\n        \"bnb_config = BitsAndBytesConfig(\\n\",\n        \"    load_in_4bit=True,\\n\",\n        \"    bnb_4bit_use_double_quant=True,\\n\",\n        \"    bnb_4bit_quant_type=\\\"nf4\\\",\\n\",\n        \"    bnb_4bit_compute_dtype=torch.bfloat16\\n\",\n        \")\\n\",\n        \"\\n\",\n        \"tokenizer = AutoTokenizer.from_pretrained(model_id)\\n\",\n        \"model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={\\\"\\\":0})\"\n      ],\n      \"metadata\": {\n        \"id\": \"E0Nl5mWL0k2T\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 1000,\n          \"referenced_widgets\": [\n            \"4dda8029a1c54f9dac38834fc49d12b7\",\n            \"e0e252da64054d6eb2f661b985aceab8\",\n            \"43a0c376ab8d40619c84e50c1fae1bf3\",\n            \"e7dc091b7bd54c439aaea379a15bdb6a\",\n            \"2fff8a5907fa44248e3b57dc4051236c\",\n            \"73f8063f8b4c4c809ce4e410ccafee96\",\n            \"11566fa4205343cb9378f078b5b18f98\",\n            \"18f17e571bb3482a9052b1b268abacde\",\n            \"96aa7b38a32a4f4db15ba6841c750fd8\",\n            \"9bca368a376542d2b60594c47050470d\",\n            \"4480a062b90d4d13b12c3ecc0b832d0e\",\n            \"3daf35d9c166402d94afcfd111b63807\",\n            \"44400a34234341f7a182d99ef8657dc8\",\n            \"a7f997badd8d47729dac8cd0aed205dc\",\n            \"57166e4e5c024e1cacc63f2bbed51560\",\n            \"0d8925b6952e4c6583b262124f28febc\",\n            \"26d72ee7eff34d74986f596cf9c6a557\",\n            \"5f9382acf893491ab582a7282edff80d\",\n            \"f7aff94ef61047cbaa55eeb098d205b5\",\n            \"45b8e4b9a2af4a72b08f0e2a3c78c63c\",\n            \"2385294782954692a455fb65e9b59733\",\n            \"1371bd3069e541d3b5e40813bac2c490\",\n            \"083f6c3c60ad4370b6587761b49e5654\",\n            \"8366012339cf4c1c8ef44ba3341df1b3\",\n            \"139901b773f141b196281de1c23f40df\",\n            \"7e0ae4a2ebe446b683e0f8be4a70dfd5\",\n            \"cc64b611043840ea9a6c1421d7327bb0\",\n            \"d687111284984c0da14ff9f534b53c96\",\n            \"d7ae79cdb87146729acac1b5f2f70263\",\n            \"fe2e1e6d423c46c19ca6abc4bc397860\",\n            \"2977c91d68ec48b09605472e2b46c62c\",\n            \"3642f2a6cbd541408e3a88745f597f38\",\n            \"756359b628f74b4ebac5392e09a03e83\",\n            \"af26a845a28e47a98a42c2344b20430d\",\n            \"559a87b3917a47738cd4d0172dc276cd\",\n            \"c41ee826c58a4542a435f41a8cf2ed1c\",\n            \"3e70a3c512a04c25a90217466f0b904f\",\n            \"b2e03d1e18a546a3bf7edc942a9ce2ee\",\n            \"29fa4f908d07492387dc9f7607f55312\",\n            \"76dea45f667b40278e34647e6b6dbeb1\",\n            \"56649cdda2034e0189aec800c6d8f4af\",\n            \"4fe01ea1a4c04d2fa9851b9e515aa79a\",\n            \"e1851b9cf0124fa3b7d876c40244c61c\",\n            \"9560bb9113724753a768d58ca0367046\",\n            \"46403b1a813e4e9d96718ec9c85a5065\",\n            \"de120e4ae5d7455390945a2df5791743\",\n            \"92f04cfa9cd04e34aa7ef73499f0c62b\",\n            \"cb4576abb0e4409aa4125a1eb3612cd7\",\n            \"8ffb76c7b6ba46698dbf1c7dedfce105\",\n            \"974775d2cc954c9a83942ce57e263a95\",\n            \"7ebe4b354fc04b099ff8064700d034c4\",\n            \"171c629395f14252a0c9f0d52ddde7f6\",\n            \"6ffb160a906143fca91bd8a62736b966\",\n            \"20c4ed70df98425b878c8bc0aeb935b9\",\n            \"cf87f95a7c514168a260c0302dd05dc9\",\n            \"3bcad19a79064a7dbbabaabb0f1c8a9f\",\n            \"a4f10c97b95b4b19b2a9a02fbc60fa29\",\n            \"53fc8538b34c4c34809bbc7c75bfc029\",\n            \"f75760a7141245a0a8881e3d7476c877\",\n            \"87855b48d3aa41e7b38545aba53d71be\",\n            \"7ea2647caa7d4b14989e6a4f795b409b\",\n            \"a173bccd53974a248adb99dc1bd5b4d2\",\n            \"3f9845343c7f4ffea9f6640c7f0ec9ab\",\n            \"b3a0d237f6fe4b6dae958fef4765786d\",\n            \"428c0f8165604415af070ee916ffa02a\",\n            \"b427fd7a2fdc49388e34c1e390c13260\",\n            \"3527915a5473497b81ccbafafc6f7345\",\n            \"f5c40d1c5de943d4bce6939895d0ebf5\",\n            \"72e94f7605c8452081f13051fe4eaba1\",\n            \"0c2a80f722f54658b6d25079377b68ff\",\n            \"85165564577b4e3ea86d8e4113425133\",\n            \"07a067b7fe0049088cd2049382701467\",\n            \"9e6e8895372e450e9a9c926bd9d66bfc\",\n            \"3dc35e22727744768f2c113f9a5358a9\",\n            \"e97ff06782a24e8897c0f6b4acdc3579\",\n            \"cc5082778cd743b7b494485050473841\",\n            \"97e06f7effa149d8afe85c153ede1d29\",\n            \"34283ecf8ccd4cf69c7292b120e2cb43\",\n            \"685bfd23604147978b3bb8ea1d758107\",\n            \"9646be0b72a44f4ba6cfc68f2a522028\",\n            \"2e523299ecbb4ef3adaa95f8dd2ab072\",\n            \"b1d721eb56d243bb865e90b61c4a9785\",\n            \"df1a778a5d254fd997e6aa992fa3d537\",\n            \"28e540c117fb4766a344a5a1a887115e\",\n            \"1cd4abd9506d4f329d7dcf58e7008ba7\",\n            \"6c5af425aaf44adea4e2027ba6062b9c\",\n            \"b9a91217395a4550a6012a06efd29e7c\",\n            \"f9c98867724e4dfda3b8918803c5bb1e\",\n            \"2c3a591792794cd98a2c7317c6f8dc7b\",\n            \"143568f550f34b1182a411497051308d\",\n            \"8e69d62d0dc14ce4b7b0269bf4e2eb78\",\n            \"5b22f2c813454e4787301c33dda5692f\",\n            \"02707d76344647ee9a5eb0e0205e2ff5\",\n            \"3f1781d7d05f4affbb0cf69a2d83cb76\",\n            \"5cc81942930d4d5fb039adaf004227c0\",\n            \"0c59932a3cdd4931a337d18558ac93f0\",\n            \"3a2bcd6c344b43b7bedddd4b56e833c8\",\n            \"16a6b4938772480d9813c36caf450514\",\n            \"b757c51a4db04c3aaaf2f229d25ce2b6\",\n            \"56ec1bada613446ca99bf8a9c4ab3d69\",\n            \"8619ac7034eb43d0ba5a05f80f080786\",\n            \"128e986f97fb4bdcb0c15b60499c35b2\",\n            \"9d3f40e1e44e46439fc8c1247704772e\",\n            \"f0f7a5aa0cf04bdfaedd20e83919bd50\",\n            \"220120f060f348b4ae9ff2beba7e5883\",\n            \"8559ddfaaaf140bd8b581cc7c3ed992b\",\n            \"ea5fb9415a3b40ae9f51354a1e2c37bb\",\n            \"e5127004f16d436cb7aa3f37fb4881f0\",\n            \"eb322e7aa135490c9da4c0c56a77e087\",\n            \"d30f71b6100d4227af6fc1b21eacd5fe\",\n            \"d36a9cd4315147329650fad25e46d671\",\n            \"55b6e0c2dbc3409e82e79427d639811f\",\n            \"174164ea94fa4188a5b2daa25533a006\",\n            \"243bcbec761c40c7906826ffdd9435fa\",\n            \"f734f41601204063aa733845a3b95d17\",\n            \"567ad6f6c2e3403285dd164db46659d0\",\n            \"cc1c1c1104644aa381ce702aa24105b9\",\n            \"be1890dbda57429cb2738cea2183555c\",\n            \"678a8fc1687749209ea33c4a1026a23a\",\n            \"58db995285574441871ce58baedb5ab8\",\n            \"a5a438b2981a4b2d8405b9614f889193\",\n            \"82cd7bf0c798403c8cfcb32afb6cd0ae\",\n            \"e6f797672bea411fb4c4ca326c7edac8\",\n            \"86210c9300df4d8a90f06d5e6dd9507b\",\n            \"d56fb3801e574fe5825f946a06ce4f9c\",\n            \"83e5f8d407754c80873b032d2a57462b\",\n            \"f30db7c8e3dc4aa08f097b4b6e2bda68\",\n            \"3b8c1ba088d84e918d5f39ac1dce6bf0\",\n            \"352ce44060c54cdc99d2e5f199ab784b\",\n            \"06b274ac8797407483e6f1828dc059c5\",\n            \"ab515be396d54273a28e80f3251e80d9\",\n            \"c51ab3d1855e47d9b21ea3d8efd47d90\",\n            \"32e975af7d5e47c38cdb539fbf64daa6\",\n            \"447d1f1c84244df380ca5dcdecc6a131\",\n            \"c5b9cb57416b41e8ba8947a4791e00cf\",\n            \"73ebd68bc7294d10ad7cac722f8230b8\",\n            \"14fb72ac2ec64dc2bdd1481765e8f426\",\n            \"e2c4a11b7b3f4239b776c0b9091567e0\",\n            \"488cbe1b8cd94a5b831ec25fab67c68a\",\n            \"bf82f6397c954c87bb004dca5a941d8a\",\n            \"c1fbab7d64b745518eabc5294ed65ef8\",\n            \"eec2597f1d79458bbb94fcb6ecbc673b\",\n            \"2a9908e7feeb447693f190da28b0bd9b\",\n            \"23986ef9fd874b10bb036bfd112f78ae\",\n            \"b1b481d7010b4452b280b9880919cd80\",\n            \"a8509d4279d74b92a03f7925e98dba2a\",\n            \"47650d294e214c42993a3027ac819d8f\",\n            \"31cd2c4b1f4a4a6fa287151c16fb9349\",\n            \"7d587694fe524e9f8b77567e0c931bc1\",\n            \"ca59d3a8b8184bdfb7259d35d9b71930\",\n            \"e404b962e80d43bf81a327a43f82c7bd\",\n            \"6f251c68c4564c59aefc458c64fce56a\",\n            \"a5e06fc3e2e943fea0afd92e1c587c33\",\n            \"bd429abc607b4103b688f5b7d581d1b2\",\n            \"8f6c5dd15d8949619fc9b2092d8ca946\",\n            \"5798a31bd44348d89d6379298e6df154\",\n            \"6e4430f78d35481fb307936e75d77216\",\n            \"01572529c80b4488b23ffd03a376338c\",\n            \"0ae2cfed5fdd4d5896dff1167ddef45d\",\n            \"422f825587034ebc8aed3fce5585f412\",\n            \"0937591b34e740838cd8208979a72e32\",\n            \"ab36702aaef44cb2ad970cfc9a00503e\",\n            \"c5f282668f8349e8901d22400cdc0103\",\n            \"1b561afcdee044d6b13e0266efd4b482\",\n            \"a46412f1a5ea44ee90483d1527f43cfe\",\n            \"6c30ed71c9f74c1a8a52ae4db40b2e67\",\n            \"3d4ca54f279544ffbdcb3f649e42c443\",\n            \"020a32c6a9bf4b979242c3129c2981b1\",\n            \"13143288627d438c9f9b451645fd80d0\",\n            \"ebea7e0a5a124933adde20b3b0112c8a\",\n            \"5307f143b29d451887f51ab83d4760ed\",\n            \"ed3fd7343b514183862d96c2f162acf2\",\n            \"658875ca73d442a89d95e0c342e0165a\",\n            \"edc21d10e8ce4f5bbb1903e52b10db99\",\n            \"5aabc69aac1443468bf163d42b258888\",\n            \"5e678d94d0c24e779f8595ebbfe2a4b9\",\n            \"cd4fa09afe2947f9b56c871f5ab7674a\",\n            \"c1f37b729da34e5195da3daecd558183\",\n            \"2794d7e2ef1543d896339f3df453c693\",\n            \"313f047cd9a94d628562f71d1b50ade2\",\n            \"c235550064204eeeb399e5f5785bd89e\",\n            \"1ce4fe592a8943d582dab3a429fee0ac\",\n            \"e2eb46ae412d42618be3e983e7774a6f\",\n            \"0d8b16cb262743c38aebfaf59d76ad72\",\n            \"53bc1c3d484c4ef7a0402e871709467c\",\n            \"ef0f33fefcbd416dbe7ec1d66747b277\",\n            \"61d3c77502ba484d988909392a1b0cde\",\n            \"ce622a736e674dd8bbb44bb3770edeec\",\n            \"11a304faca7c485f872cd2960cb08a70\",\n            \"5f41dd1d1f0d4993a7a7546fca2c5fa5\",\n            \"44ef7387ed4b431994877f3bf685bae9\",\n            \"9afe2fe999934ea88c3e9f818c12b736\",\n            \"b1c4b8a4449846fb8c973c50f9135c39\",\n            \"7a6019f79e014d3f9aad568657f57ab7\",\n            \"1353871fbe8248b5baf2064c6c9a8523\",\n            \"c2abddd737ea45ee83fcaa7da402ba64\",\n            \"4ddf5968860944f9a98cb86c87e50475\",\n            \"11694f04f8d54aabbe88a5191d6ed7a1\",\n            \"809aa3334f9a4f3ebaac51fa1e3c6a71\",\n            \"8dfe08e9fe9b4f8db6af7a0daaab34d5\",\n            \"0ee484c157854d51a56a23fedf0e8c01\",\n            \"63b801bdfdbc4039b60ae3eb70ea7878\",\n            \"52e71e07f9ab499680b39e651c01299e\",\n            \"b690c09f1f354ed2b9a92ec9e924aeef\",\n            \"1be403b93c304f93806fc197b9f213bf\",\n            \"c033a3fc865c465b91ca59b731317e6f\",\n            \"c8f72bf608ec46e891f5bb577cb35c04\",\n            \"b759a9dba3c54819be1bc3a3dfcbfb36\",\n            \"709f5a80e82d4509abccac058187969e\",\n            \"eead49138651461b99b62a5f2d9a58e7\",\n            \"5329b729ec8341c393943bee5b8a6c59\",\n            \"7777b18fdd9a45469df47fcd8bf5752e\",\n            \"0a1ad847df9548128812430ac683c948\",\n            \"c0957f187b654dfbb6606e777eedbbaf\",\n            \"f3e5142b4e8f493684ad9c25af192925\",\n            \"d9540d78686741e0ab500c7b7ff13e44\",\n            \"baed6664b673437a8934f51e5ef94cad\",\n            \"55110aebdaa0456f962b97de322088af\",\n            \"746e1b660217449b9d79719b3b982ef5\",\n            \"1a93d95c6b6447c186cf30afa84deea7\",\n            \"71da713626974bd8b960702e0552a088\",\n            \"b6f4796f219b4f4ea1a048fd6e0eb1c8\",\n            \"be54b5fb20014aa79f365b56938dda29\",\n            \"d67c72b21291410a88d3921ca8727f50\",\n            \"004efb3a49ea4f6489d364fd791ca61c\",\n            \"607555489e2a449283423e9b092a3967\",\n            \"da4c8b8065f040d89f016dcae293e5e8\",\n            \"c33226c7265e407aad5b3acd4098ae42\",\n            \"755655b98aae4792879fb5fe8c76632d\",\n            \"7d7cde351a464875916d936998420a15\",\n            \"d03638361cad4b98960d9b28b956381a\",\n            \"59caf02d181341ea81b1e0d7f3f63fab\",\n            \"1086211808ff4b88bfbec7bb831696af\",\n            \"84d00f6bce284d2dbcb8ffcb17aba143\",\n            \"2fd9fb963f5f47d1acfcc75e5c53d97c\",\n            \"7089e6d90c2342008d6a4a7455ffd156\",\n            \"7179091d1a3a41f1842cb20f9e2b0063\",\n            \"c8a1c8ff762a4a41bf700a777cee83d7\",\n            \"788c219f0bd44a2ea9bf4b5c34fc9e8c\",\n            \"0b240c99432940bfb405b267f04c471f\",\n            \"265a26354cbf411b816f49a1d4f1a16f\",\n            \"88a71774ab1b408fbdd0cc80815de782\",\n            \"6524a46e508d4820a0d3d8a7d2231301\",\n            \"1c4fb40f1e4d4689a4f56845ca76c083\",\n            \"1c4e665173a64b60b16b325238617bfe\",\n            \"01177211c6e942bb99910e3a4450b447\",\n            \"d9b81389512248eda61573b451e18c96\",\n            \"864877c73f13402c82a76b089090ff1a\",\n            \"60a24e88c8014e28bfc75db147f8d7cd\",\n            \"6ab3a5774e7b45dfb178b61f8f7971b6\",\n            \"5a28c8947e7347e4a77e4cd105f3821f\",\n            \"66b9917a04cd414b90ad732da43d2ebf\",\n            \"3a92e30dd87048849d978024d17adfae\",\n            \"b64cbb7874a94dc8811fde3ce5064823\",\n            \"3abe17ab46ab454190e0ab5009581190\",\n            \"9ee1f5d6bcba4a22a4646c769f26f2ae\",\n            \"2b44a7cc5d8d4c509dd9179960f0d4fb\",\n            \"37d3b49f317343899d0a001b708c8289\",\n            \"bd2c8ad252f04847b3bcb7948d4adb0d\",\n            \"d63b16952fbc49d3b1c0fe232863c7d7\",\n            \"a018be650f764b9eb0157bfdeb46f525\",\n            \"ae2d8a80559546c9873bc14ab9a82f3b\",\n            \"817b79fe562b4d1787cbcd81dcb71b8b\",\n            \"4213af025f004d17b900e22da2f502a2\",\n            \"717ac9d4a7094de0aa740e9dae45c972\",\n            \"51495d20a38044d3aec0e5c1010d716f\",\n            \"6f7da3f317d842cc9a9edc6417a440cc\",\n            \"30dc330e207c4186857d9665a1ed76f8\",\n            \"5cf1795eb29944c08f36c9c606e67977\",\n            \"bb53ce4664e84e8dbce58cc5316cf3b4\",\n            \"2cce29639a2a431082bb1ae161568abb\",\n            \"f485d9d1c2464655883739e79e5b8ae9\",\n            \"c3b4976a88434ec4a18b5fc44b7f86c9\",\n            \"96eceb46444842a9b43069ef22844828\",\n            \"3d99b78943fa4242963eda194503a421\",\n            \"33ce9e2a338a48ad8f4b607e23c3ba23\",\n            \"ab81662b419742dbb262346d48fb8a45\",\n            \"3624a588cd2d4a67a7e725b1a76a0377\",\n            \"951e087f4970473f99e8eb0d795f5807\",\n            \"cfe68ac339a44c2da96f59660ddda2a6\",\n            \"4cb06609ca684cd688d40ec30c5d7efa\",\n            \"844a92d2c77f4a588d2ff19ec0bd0574\",\n            \"b50f27ca4b4647f89ebfabb783d311d9\",\n            \"97e506e771a944e38ae6e847bf2c80a2\",\n            \"c4cd9b2090a84ca797adc81e6b155201\",\n            \"d36db0d2c3f1436c80a705c2573a884c\",\n            \"33eed28c85dc4fff8ed3ca549a72b250\",\n            \"f4a50369c17d41ab8ee65836e9f409cc\",\n            \"e689784e942341b799809d663277e3a6\",\n            \"25803c5e86a8436caa0a3b7341d6b24f\",\n            \"c81db61b545d49979d8df3b35f2d2337\",\n            \"988bf6ea04e24f419e2c55dac25c7b92\",\n            \"ab0b1734f4fa4cb8be00a3fd2c422023\",\n            \"37574cd071fe49018b9cd729410f78d6\",\n            \"404126188aa8471fbc975c1292c61177\",\n            \"964f9665564a405f8383779f462809a3\",\n            \"593fd457a0a647f1ad23772663455075\",\n            \"1c562ef279d7496594b3b0f5e191b2ca\",\n            \"02af81a3890a4eb69376481dddb62757\",\n            \"4dcdb8b843ff47c092ee7c812d4820e2\",\n            \"5d5df9b5f06b4679818e5f2e3b69bb3a\",\n            \"9d7c0006eba24d48a9a79021a72cab14\",\n            \"f5f7ca70572641ee9c3303ff17dc19cf\",\n            \"c4c4a8e92e41410bad01974c0516fd5a\",\n            \"21503fdc35034377b332127cff47d2cf\",\n            \"11b2774b38944055b16d24f47f715bf1\",\n            \"55b72408c4714609ba5dad380c0107da\",\n            \"3b38453d82f94ba3bb2c952e4cf3203b\",\n            \"7152e9a878e94615b1a20f37f31ec8a3\",\n            \"adcce8e6fcc74ccb9081c532dabf7954\",\n            \"5b1166971deb41afa3f6710e22dda622\",\n            \"c071eac4a95d42f48cc72ece1a2d3a2e\",\n            \"e2ea4c6c32b549fcb79c04b1576b6b6e\",\n            \"9dce8e6885404bef8e13951ebaaf2810\",\n            \"35430f86dc0b4b98b0587ab5ffb3c445\",\n            \"210fc3b7a05346e29d5b47b3bd62e96d\",\n            \"b13a851953974020a7e78ebf80ec39f9\",\n            \"5f6043547d8b4e9790d614789c0bfc4d\",\n            \"0a0d5615ab01469c9bea66ed9409e855\",\n            \"d7873eccfc1f4a5e934c8061168b7ff9\",\n            \"26016a9811154ebfa85d7537936c68e4\",\n            \"52f118afe6a74797b89fc1aa868de358\",\n            \"60591bf7bf9f4052a4a62c548cb0f7d3\",\n            \"a314e4f42e34465fa53e48c7fa46a859\",\n            \"6c2a653d96064901a94cb8705ef5ba67\",\n            \"0c637202da564f46ad98f8aa9c6e94b2\",\n            \"aa836e70e66642d6bb1f7fbcf7482cf2\",\n            \"b14bcb5b3cd440f4955ab25fea1beef3\",\n            \"6562fcd1e25b4522801f604ac8553148\",\n            \"fef01075c3e74c62bd92e8a668b88f90\",\n            \"3c90ae0909df4199a1555fd7b8c9980c\",\n            \"f5f1e45fac7445899e5678cea2ad0190\",\n            \"752a96af39264fcda7f772bf13a386c0\",\n            \"2c954ee5db5f4bad80dc54ca8f3fa634\",\n            \"e5a9f048600549f886af24bfc7df7ad8\",\n            \"971c54165ca3468cbdd0f0f5735cb690\",\n            \"f8c9ae1c8056424a9582c3c7b5528bcc\",\n            \"b4342b8efe7442478188cfeca4e65b80\",\n            \"793715ef171e4a91bb3607a0b7b00143\",\n            \"858c83c823cc41be9013e760012cb676\",\n            \"03659d1840034d76845f786268a2aef3\",\n            \"1ba58ae904fc48079e86646936816330\",\n            \"b48770c25a274ce4aeeb48616c29c53e\",\n            \"1a9840e017a74ea3bd556806fb1bde61\",\n            \"b95d0160c569437d9f3ffdcd0d83fce5\",\n            \"2f980bda91d942ea8972ca21c7998103\",\n            \"0553b6dc9a09484ea0cc61cb70649748\",\n            \"4b48da34bde344ea93bc717e62b4bbb8\",\n            \"3f4c9e0fc4ed4c24821fa24e61a98b9e\",\n            \"675cae1fc5f942d68f1704785f805831\",\n            \"436a3cd81bf844f89d10a861ad763192\",\n            \"7fdce14fef024cc190e5a8c8d5e73c7c\",\n            \"b03e33cba1a542758eb0ca4ad3daf3c2\",\n            \"1b3188144e7e435a9dd6317915bb8d74\",\n            \"bd9ac4d8be254686a25a796534e30270\",\n            \"e241026e90734fab9fdedb5357a8153f\",\n            \"88d5307a7bc04acda9ab001d7d6ab70a\",\n            \"8e85eb7198fa44fdaf7401ec5a4b015a\",\n            \"41da66fa34144359a7049ca48632af21\",\n            \"ae3961cd33e544c1845602589d41d0e7\",\n            \"98184a0c8eef4d5fb3665a9a07441dc6\",\n            \"e331cb31fed345b5bcc3151086bc051c\",\n            \"1645c00f9d1d44f6b3b590c53cfe7b97\",\n            \"841a63cb8294465d974f241c7e966d59\",\n            \"f98778ffd4874d37a72db041a751de17\",\n            \"224069f59623449fbb0e224837522d2c\",\n            \"77c2673577a34c3387f8dbbcafc99fae\",\n            \"855cf3add41741868e713d0302acf560\",\n            \"ffbd499631cd4b05b5aef6c0e7321cdb\",\n            \"550a5e9cf8be48f58205e8e8c7c0ae56\",\n            \"89538980a0f7469ebba49444fb8da63a\",\n            \"89ee0bd3c1e24b6d8863abe82a1ebc58\",\n            \"bcff745430fb41839e40f650e7b14cba\",\n            \"6a54953f28a54ae18c0d4aaa5cb0387e\",\n            \"b96d793a9ca54cd190cafb8cf6e470f2\",\n            \"91636dd7dca143c19b62297a2fa5ce00\",\n            \"f33334ceba1c4246a189fbfd111f3068\",\n            \"9900afd61ab641a895ffdc8a10e6496b\",\n            \"955b0e93751d48809ef95d419453914e\",\n            \"136982307cb04bbabfa01a4de219c62a\",\n            \"e99d1fcdc87b4f74a607cc98bca5a398\",\n            \"047daa8310e44e63b19aa564b2839837\",\n            \"3412f43e49484b3986c88ac669be1901\",\n            \"ff7aef963ee0491ab49fc4bd3e0976b5\",\n            \"882a1c11c929464aa93963025a5a00ce\",\n            \"b4b53b4f4fce468f98c3ac047385f78c\",\n            \"9476364cea034e12ac3f29e6ca2ba557\",\n            \"b420f4a0289345d98c491baf2990e5e9\",\n            \"12ecb306485a4f7b85d62344460f8bfd\",\n            \"65546be37586428a824de17d1c038b25\",\n            \"6e05196caf734006ba4e3c6d7baa8450\",\n            \"cb56376b773047d8a7adf71f72c920ae\",\n            \"d73316ef8c484c2887cc8e307a1b570e\",\n            \"75fde083f37d4e318992d56d9778873b\",\n            \"9ded2515a84d4ba78e44c1c2f5acc58d\",\n            \"ccf04eb1a35d4440b403faf00c7407f8\",\n            \"178feebf2c524f989e9943c4e30abcf7\",\n            \"9498c899ff794d0da03970dd88bb442c\",\n            \"08c234ba5a4541f88020dc79157560dd\",\n            \"d2b8b267ed4b4ab0a8ed8199836a91f7\",\n            \"fa2de2fb3dde4f72ac3228bcdccd4e01\",\n            \"4b420c08ff2a498681e8dd59f9f2ac2c\",\n            \"7cc4d367c20a4b04b54694dfcf170f74\",\n            \"3bbf51b944d94f1bb6a9be1e0b226309\",\n            \"2a5437d5d93d49109fe74fd03c39409e\",\n            \"59a2f6ee62014f56b017bdd9f8f0b57f\",\n            \"5fb5c37cbdad458cb7c0d7ee2c9328ff\",\n            \"92631b29e6854d708d702cfa0a18ac7e\",\n            \"6806ee5ea0a94590923866fc5801e857\",\n            \"682ebeeedfe84274b4af85ff1d2650e7\",\n            \"4b4cff80d741452caca7a622f6d8880a\",\n            \"6efee891674a4493aeb4361d118f60bf\",\n            \"9157207c151a424cb78115c8549df716\",\n            \"0d92b2b2bda9459791212d5361b478ae\",\n            \"e08595a84b36432d8bdd0fdfa9eae05c\",\n            \"f59ba68fd3264f94b198b94ae5da3463\",\n            \"5120030a5fb54687bd6e1ad4647682ad\",\n            \"ec3b956f469342f4838eee8b127a5eee\",\n            \"75993d21834f4bfe88680cce6ef45920\",\n            \"09d71d9e66344b3188f766bebb5c3181\",\n            \"901f923c7eac4576b8ebfa37b228abe1\",\n            \"c893830e66c64d639169f274f3a6e133\",\n            \"8dc64a19d272401c99a2a5cd03bcced8\",\n            \"c51ea2b528d54086b8ba59f1c244b1b6\",\n            \"e4bb32bdee5442caa564425b9d722ab1\",\n            \"88c728cdba7d45418b5508b5b017c119\",\n            \"b97d12a1ad1641f5b2ed3fe75d239de7\",\n            \"507d78d4c25c4be39c7e8a849e21f637\",\n            \"762557b63ff8434482e3f10210a5cca2\",\n            \"0bd4726d64bb40cfa7de9a19a6e56c2b\",\n            \"e26247ae7b8d468487248f2c8c3b1a42\",\n            \"e6d8198c49824287933934dd1e7a2ad1\",\n            \"98d5c0c40bbc4778b72b932d2b8962f2\",\n            \"1a17f81cf890444c8cf494b36bb215d9\",\n            \"e0cf236c7703460389083200a41fd6ec\",\n            \"0a8295c3cadd45e2a93ac9b2741c07f1\",\n            \"f521455989c14fa681987a9cf5b750e6\",\n            \"02b06c57c93c4e1891801bd89a712f94\",\n            \"32277f46720b427291677a655535895a\",\n            \"fa79c90a3e154e62b05b771cf6b1c1dd\",\n            \"d1b5d9ca7b66455690b60923e5846eb2\",\n            \"9358e1494bad4f779a34df60681c8755\",\n            \"54e9d511e8d649e4b75d92f9a9ecef25\",\n            \"d4ebcc4829b3441d80bf0813fed8262c\",\n            \"0b9e6b44680040859cda9ad0592f1a8b\",\n            \"04f673c3a73b4b7e875401c78805e400\",\n            \"8a58aa290dd84c0bbd725fb1456265c8\",\n            \"15de375958db4c58bc4a57c02a04b995\",\n            \"a21dd3e4bdb74d319696691ba96cb8b4\",\n            \"16490e85dfba4ef49e784388a5a9a2e4\",\n            \"594e5e9090e04f88ba535dff0b265b5d\",\n            \"42bbe9b70d384746969b749ba9f35765\",\n            \"de6f4a7cb67e42df99dcad5ded846501\",\n            \"e892e00bba3549e09f450ae42eed7896\",\n            \"9ebd056764584729899cf61e36782393\",\n            \"8c08a645c5ff49558146192982c25ae3\",\n            \"e7ca8628c6324408a1c2362c8b7c07ec\",\n            \"c3a7ad5e55d14594b0dc127f7eae2412\",\n            \"7c8dd9e21fd643919e6085d99be6a742\",\n            \"00405508e43f4c4db6cd1be27d643648\",\n            \"6fa62c48d7c94129b9d1e416eeea75a4\",\n            \"6fd1c98b77c545fcac9847e82140ca72\",\n            \"9d5ab6c4afb44c86bea8ab38b6514cfc\",\n            \"8326df60d2cc4c7a8e02781ed41b9e1e\",\n            \"6767da18ce8d4d86b0e7b8fe96e393c2\",\n            \"be3a2bc3a39441a4a74e7e99a83478cd\",\n            \"c776d1747d6b4510aa8b98ea67bcb58d\",\n            \"e8cf552c2f0345e8a3f7c87c3bedb52e\",\n            \"4735b5f44bf9479380c6589b86998bd3\",\n            \"eeae561c0d104ffbb1409d710a3531e3\",\n            \"7d8faf8f3efe415bbac6e97014265ced\",\n            \"2df77d291c494ee4b2d12f316a2ad7bf\",\n            \"3be17dbae47f49c2a3fc9cbf1ee125fc\",\n            \"fad45c9d935b447689da4fda9b8fb10b\",\n            \"8b63b5047f2740f88759be1a6ef4667b\",\n            \"16032d320d5f4ee1b2283a5954bb4b01\",\n            \"3f87b2f361eb4497ae9667b675de3608\",\n            \"26af4445311a4e8a9676070a80aa468f\",\n            \"ba13004801c44455b421c79634d1b5a0\",\n            \"eb0245a2bb6b48bcaffc28095eb5a321\",\n            \"8286251efcdc4ffe905e8a6e73b618fd\",\n            \"08d35063dfba4f8e90053252d7d01d62\",\n            \"8a42c0b87a6749bba063a793633dcade\",\n            \"289ea8304f7f4344aded557fc725bd0b\",\n            \"f4fea9a4da374008996af29ca366daaf\",\n            \"6d52448d893441b8bbda8420932a349e\",\n            \"373bdacc2f424f358e0d8d1125d001cb\",\n            \"64aeae3ef2cb4df69a7d7b28a49352f8\",\n            \"9fb436aa8a124a468cfeee45e677a284\",\n            \"c1b72c20a3c94ac7af6c51b975a92daf\",\n            \"5aa2db30846243aaa9aa097aa3069e18\",\n            \"a769b7be45044bc29e352ffd4c8eb4c7\",\n            \"fd958bc26b4047beb5efcf60d4c4005d\",\n            \"bfed6ea906d84bba9eeaa9acd9478257\",\n            \"405b50a6908b429db31164eebe2c3185\",\n            \"29ed63e571054a6da7f7e3c22d778162\",\n            \"6974cdb61fe64bc3b41bb896b9f2cf6e\",\n            \"8de2ddb8f2a547719b47372dd3b1b401\",\n            \"93197b59727441b48f30888a957856a7\",\n            \"0cafddc8dbe846e9ad1f7566ac3cb391\",\n            \"fc127c01ec8f402ba250bfeadbad24af\",\n            \"9a1e6aa74018404ea3b8d9dbd6c68318\",\n            \"186866f1963945299d60c7fa9736a8d9\",\n            \"b53909d4c63c4f2da40a6f74f8720d38\",\n            \"b05467bdb6f745c69d15916d72cb454b\",\n            \"905e18faa37243359a3f477dc04297dd\",\n            \"67cb5f74be6f42ed8c5757a979dc73c7\",\n            \"f66ea56a983244f5b6e09c2f1e3b41f7\",\n            \"92d21c6e1ec74f6bb7099bf0bed8fd6a\",\n            \"d905b494f26f4b14bf310fa608e7b183\",\n            \"f6e0185d77824451a98cf74b5e130b9c\",\n            \"2c910835d7af4fea90519bd6c2462a05\",\n            \"1e188454444348bc81d917c7affb99bc\",\n            \"dc9757d4e8274e268993eaa959f56093\",\n            \"6667e6a6c8654ab2861bb3a3704aeb19\",\n            \"2d007adbcfd644f3a730295d5837e489\",\n            \"c4d99ebbb7534c50b49cba41018b7092\",\n            \"fe22e1525cbf43a2901eab928ec1e71b\",\n            \"78d6e3f93f474285bab153571e758b1f\",\n            \"7ba3f9e9e0284937bd83c50bc398b508\",\n            \"c2999e626bac4080b7576ebad1ad7045\",\n            \"98622ba516af4606b4fe2a8a88b56ca1\",\n            \"79871994a3a6416a9aa681c0dd0f98ae\",\n            \"0bb7e4c23db34b61871e58c820609338\",\n            \"a908479ac551494a9e13334ac6bfc0fd\",\n            \"5399a856774945c8983fb31c9d853add\",\n            \"0dec4c617e1b45f99ed431296b6803e3\",\n            \"7db935abf7094f559660d4d8a6858e89\",\n            \"fc4fd46da5a24613ba52d4b5344ac3ec\",\n            \"e30331900ae44f0fa0aad105ac467d27\",\n            \"63f0b67181414e31b761d3bc58f2c2e4\",\n            \"0b35ba774af84c13b00b06ca03cb145f\",\n            \"193d4f7dfaa841c48854648a494321bf\",\n            \"3340576d3b57404599fcc08deebac3b8\",\n            \"4e1516fd31df4892a8a29d7e359866a2\",\n            \"d2bc4fa779784e95b5d18a2f88272996\",\n            \"fb14afd7402e4af5ab1a66239b5f50cc\",\n            \"813b1bfdac4842828fc929b884d6348b\",\n            \"7612dbc295f64405b74b907230e727b3\",\n            \"f272074531e0421391f1a1a214d912e7\",\n            \"ca92f2b98b3646f294cec546eaceac90\",\n            \"45b0889f87ad4e689c6cbdb1790a6aba\",\n            \"f68a9c9a98e44336a8bb5557a7c18444\",\n            \"a3b1e0fadb794e46b581890a346eeec6\",\n            \"6dd791d0e0a64f8f8b69e83434088c7b\",\n            \"d4c33048bb25493fbc8c04331812dd0a\",\n            \"3645e270ed33453aa0f3e7e563bac6a1\",\n            \"ca7e00c2965449d7a2acbf320736c3ae\",\n            \"d4248737387f4344b157e955a6383d82\",\n            \"7e870beb473c4a3ea9fcf4f2c1495d09\",\n            \"eadd0ad10ef14199b4eabcfc74f84efc\",\n            \"1af186c96a144b18af38ee964121a184\",\n            \"68cea25fe0e74326a4d74a16fff5aa41\",\n            \"961d0f2d17324effa4289951a481062d\",\n            \"abb70d1655c04f63a4f913df934c6566\",\n            \"61e7833fb298417fb3453af2813a99f9\",\n            \"61496bd04431439ea0bfe37af1803916\",\n            \"924ac6ca25864079af5e07234fb074aa\",\n            \"7261d8a4fdce4034a778989e9801a1f3\",\n            \"fd3c12a07a274ede8098c306685949fe\",\n            \"880fd220af564e67b1251283902157e5\",\n            \"7c1dcef2827749b2aa270ca386b33756\",\n            \"f5b83a9a7ead48829887a53b950870a6\",\n            \"03fc8ba5edc842b58e94ca9de9c926d4\",\n            \"fe79a709c85841b98e5ee72ff91990ef\",\n            \"1cb3356f90394ca5b69edccac816827e\",\n            \"a718f32db99f4e809e1c51c58eef8268\",\n            \"254889affd2e4f139720740e709a289b\",\n            \"e1ffb78ae02f4ddeb2b37dd7f31895e8\",\n            \"ab3dd37580874852b0800d0c87793bfd\",\n            \"882daeceef624f3bbf52d2c8cbc021eb\",\n            \"faa3b04c548e44f08a90f845086cba54\",\n            \"6cca6a67d7bc4b5bac7e6b8ae120659f\",\n            \"9c6f544bb6da4252a59965df8e62075d\",\n            \"712bb50c8a154532a2a99a569c02ed01\",\n            \"79ecf447e45f4056bc8b11a96d1735a2\",\n            \"e173c5176713482c921f7761d7781fd5\",\n            \"d3a78bb198174512b362b18c4464e8a7\",\n            \"5196839e802d44bab597bcdcd11cf796\",\n            \"baff9ad8a46546fe99245d1e7069518b\",\n            \"f8c35ac29d2f44bea6e5b2fc077afe90\",\n            \"e41f5054b2f5427181000dc84b0092de\",\n            \"1e06550a06f14dce998278f321a9eec3\",\n            \"b6c0c923b50c4d10a7ca77717c2e6629\",\n            \"ea4263560a734d388c028f0b3fdec4a1\",\n            \"9e361f6a71e34b2193da9a80886c7622\",\n            \"725b833e25a84c569779d6917dc840d6\",\n            \"d74e3baaaf164a5babcbe6590954be6f\",\n            \"672b9c36a7a6489eb6cc7788e604ec7c\",\n            \"e0f0899196654747b9cfdd79bdb2d125\",\n            \"d9aee99605884feea4739c62cf066029\",\n            \"d1e85534930f4eeda07099642b07f816\",\n            \"3b9399e715e34c6eadcc203fda73a886\",\n            \"9b56dad3aab74dfb90245fa5a4e0215d\",\n            \"fef6669cd33a4ad7b15704f6fdeacf22\",\n            \"706cd5f292a442729607c3689cde97ff\",\n            \"d63cb79240b44890924db96953e29351\",\n            \"f48581b1364648798ded66e5300d22e5\",\n            \"171ff31740b74177b198115497b8b42d\",\n            \"07eb591d0c4d4dc8a3a6f9452b7294ad\",\n            \"6cf056fa03c4450d8804916ad08aa64b\",\n            \"8c2290c4a29c4aa78ade41dd6cbec5e6\",\n            \"9ff0bdcf66804998836e0ece1e2978e5\",\n            \"c2b4e4345bd94f6780718e26cfeef0c8\",\n            \"9a692c5494ab444eacf25307ad0b44a6\"\n          ]\n        },\n        \"outputId\": \"c49a8cfe-3001-4244-dfdd-bfd58869e20b\"\n      },\n      \"execution_count\": null,\n      \"outputs\": [\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)okenizer_config.json:   0%|          | 0.00/156 [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"4dda8029a1c54f9dac38834fc49d12b7\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.08M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"3daf35d9c166402d94afcfd111b63807\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)olve/main/merges.txt:   0%|          | 0.00/457k [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"083f6c3c60ad4370b6587761b49e5654\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"af26a845a28e47a98a42c2344b20430d\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)cial_tokens_map.json:   0%|          | 0.00/90.0 [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"46403b1a813e4e9d96718ec9c85a5065\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)lve/main/config.json:   0%|          | 0.00/613 [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"3bcad19a79064a7dbbabaabb0f1c8a9f\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)model.bin.index.json:   0%|          | 0.00/57.7k [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"3527915a5473497b81ccbafafc6f7345\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading shards:   0%|          | 0/46 [00:00<?, ?it/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"34283ecf8ccd4cf69c7292b120e2cb43\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00001-of-00046.bin:   0%|          | 0.00/926M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"2c3a591792794cd98a2c7317c6f8dc7b\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00002-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"56ec1bada613446ca99bf8a9c4ab3d69\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00003-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"d36a9cd4315147329650fad25e46d671\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00004-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"82cd7bf0c798403c8cfcb32afb6cd0ae\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00005-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"32e975af7d5e47c38cdb539fbf64daa6\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00006-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"23986ef9fd874b10bb036bfd112f78ae\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00007-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"8f6c5dd15d8949619fc9b2092d8ca946\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00008-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"6c30ed71c9f74c1a8a52ae4db40b2e67\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00009-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"cd4fa09afe2947f9b56c871f5ab7674a\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00010-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"ce622a736e674dd8bbb44bb3770edeec\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00011-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"809aa3334f9a4f3ebaac51fa1e3c6a71\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00012-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"eead49138651461b99b62a5f2d9a58e7\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00013-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"71da713626974bd8b960702e0552a088\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00014-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"59caf02d181341ea81b1e0d7f3f63fab\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00015-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"6524a46e508d4820a0d3d8a7d2231301\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00016-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"b64cbb7874a94dc8811fde3ce5064823\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00017-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"717ac9d4a7094de0aa740e9dae45c972\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00018-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"33ce9e2a338a48ad8f4b607e23c3ba23\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00019-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"33eed28c85dc4fff8ed3ca549a72b250\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00020-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"1c562ef279d7496594b3b0f5e191b2ca\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00021-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"7152e9a878e94615b1a20f37f31ec8a3\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00022-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"d7873eccfc1f4a5e934c8061168b7ff9\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00023-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"3c90ae0909df4199a1555fd7b8c9980c\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00024-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"1ba58ae904fc48079e86646936816330\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00025-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"b03e33cba1a542758eb0ca4ad3daf3c2\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00026-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"841a63cb8294465d974f241c7e966d59\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00027-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"b96d793a9ca54cd190cafb8cf6e470f2\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00028-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"b4b53b4f4fce468f98c3ac047385f78c\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00029-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"178feebf2c524f989e9943c4e30abcf7\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00030-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"92631b29e6854d708d702cfa0a18ac7e\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00031-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"75993d21834f4bfe88680cce6ef45920\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00032-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"0bd4726d64bb40cfa7de9a19a6e56c2b\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00033-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"d1b5d9ca7b66455690b60923e5846eb2\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00034-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"42bbe9b70d384746969b749ba9f35765\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00035-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"9d5ab6c4afb44c86bea8ab38b6514cfc\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00036-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"fad45c9d935b447689da4fda9b8fb10b\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00037-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"f4fea9a4da374008996af29ca366daaf\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00038-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"29ed63e571054a6da7f7e3c22d778162\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00039-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"67cb5f74be6f42ed8c5757a979dc73c7\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00040-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"fe22e1525cbf43a2901eab928ec1e71b\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00041-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"fc4fd46da5a24613ba52d4b5344ac3ec\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00042-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"f272074531e0421391f1a1a214d912e7\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00043-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"eadd0ad10ef14199b4eabcfc74f84efc\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00044-of-00046.bin:   0%|          | 0.00/910M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"7c1dcef2827749b2aa270ca386b33756\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00045-of-00046.bin:   0%|          | 0.00/604M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"6cca6a67d7bc4b5bac7e6b8ae120659f\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading (…)l-00046-of-00046.bin:   0%|          | 0.00/620M [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"b6c0c923b50c4d10a7ca77717c2e6629\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"stream\",\n          \"name\": \"stdout\",\n          \"text\": [\n            \"\\n\",\n            \"===================================BUG REPORT===================================\\n\",\n            \"Welcome to bitsandbytes. For bug reports, please run\\n\",\n            \"\\n\",\n            \"python -m bitsandbytes\\n\",\n            \"\\n\",\n            \" and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues\\n\",\n            \"================================================================================\\n\",\n            \"bin /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda118.so\\n\"\n          ]\n        },\n        {\n          \"output_type\": \"stream\",\n          \"name\": \"stderr\",\n          \"text\": [\n            \"/usr/local/lib/python3.10/dist-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: /usr/lib64-nvidia did not contain ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] as expected! Searching further paths...\\n\",\n            \"  warn(msg)\\n\",\n            \"/usr/local/lib/python3.10/dist-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('/sys/fs/cgroup/memory.events /var/colab/cgroup/jupyter-children/memory.events')}\\n\",\n            \"  warn(msg)\\n\",\n            \"/usr/local/lib/python3.10/dist-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('//172.28.0.1'), PosixPath('8013'), PosixPath('http')}\\n\",\n            \"  warn(msg)\\n\",\n            \"/usr/local/lib/python3.10/dist-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('//colab.research.google.com/tun/m/cc48301118ce562b961b3c22d803539adc1e0c19/gpu-t4-s-w28wpkkwcnrv --tunnel_background_save_delay=10s --tunnel_periodic_background_save_frequency=30m0s --enable_output_coalescing=true --output_coalescing_required=true'), PosixPath('--logtostderr --listen_host=172.28.0.12 --target_host=172.28.0.12 --tunnel_background_save_url=https')}\\n\",\n            \"  warn(msg)\\n\",\n            \"/usr/local/lib/python3.10/dist-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('/env/python')}\\n\",\n            \"  warn(msg)\\n\",\n            \"/usr/local/lib/python3.10/dist-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('//ipykernel.pylab.backend_inline'), PosixPath('module')}\\n\",\n            \"  warn(msg)\\n\",\n            \"/usr/local/lib/python3.10/dist-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: Found duplicate ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] files: {PosixPath('/usr/local/cuda/lib64/libcudart.so'), PosixPath('/usr/local/cuda/lib64/libcudart.so.11.0')}.. We'll flip a coin and try one of these, in order to fail forward.\\n\",\n            \"Either way, this might cause trouble in the future:\\n\",\n            \"If you get `CUDA error: invalid device function` errors, the above might be the cause and the solution is to make sure only one ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] in the paths that we search based on your env.\\n\",\n            \"  warn(msg)\\n\"\n          ]\n        },\n        {\n          \"output_type\": \"stream\",\n          \"name\": \"stdout\",\n          \"text\": [\n            \"CUDA_SETUP: WARNING! libcudart.so not found in any environmental path. Searching in backup paths...\\n\",\n            \"CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so\\n\",\n            \"CUDA SETUP: Highest compute capability among GPUs detected: 7.5\\n\",\n            \"CUDA SETUP: Detected CUDA version 118\\n\",\n            \"CUDA SETUP: Loading binary /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda118.so...\\n\"\n          ]\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Loading checkpoint shards:   0%|          | 0/46 [00:00<?, ?it/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"fef6669cd33a4ad7b15704f6fdeacf22\"\n            }\n          },\n          \"metadata\": {}\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"source\": [\n        \"Then we have to apply some preprocessing to the model to prepare it for training. For that use the `prepare_model_for_kbit_training` method from PEFT.\"\n      ],\n      \"metadata\": {\n        \"id\": \"Mp2gMi1ZzGET\"\n      }\n    },\n    {\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"from peft import prepare_model_for_kbit_training\\n\",\n        \"\\n\",\n        \"model.gradient_checkpointing_enable()\\n\",\n        \"model = prepare_model_for_kbit_training(model)\"\n      ],\n      \"metadata\": {\n        \"id\": \"a9EUEDAl0ss3\"\n      },\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"def print_trainable_parameters(model):\\n\",\n        \"    \\\"\\\"\\\"\\n\",\n        \"    Prints the number of trainable parameters in the model.\\n\",\n        \"    \\\"\\\"\\\"\\n\",\n        \"    trainable_params = 0\\n\",\n        \"    all_param = 0\\n\",\n        \"    for _, param in model.named_parameters():\\n\",\n        \"        all_param += param.numel()\\n\",\n        \"        if param.requires_grad:\\n\",\n        \"            trainable_params += param.numel()\\n\",\n        \"    print(\\n\",\n        \"        f\\\"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}\\\"\\n\",\n        \"    )\"\n      ],\n      \"metadata\": {\n        \"id\": \"gkIcwsSU01EB\"\n      },\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"from peft import LoraConfig, get_peft_model\\n\",\n        \"\\n\",\n        \"config = LoraConfig(\\n\",\n        \"    r=8, \\n\",\n        \"    lora_alpha=32, \\n\",\n        \"    target_modules=[\\\"query_key_value\\\"], \\n\",\n        \"    lora_dropout=0.05, \\n\",\n        \"    bias=\\\"none\\\", \\n\",\n        \"    task_type=\\\"CAUSAL_LM\\\"\\n\",\n        \")\\n\",\n        \"\\n\",\n        \"model = get_peft_model(model, config)\\n\",\n        \"print_trainable_parameters(model)\"\n      ],\n      \"metadata\": {\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\"\n        },\n        \"id\": \"Ybeyl20n3dYH\",\n        \"outputId\": \"b2629933-0e7f-4f9f-e6fd-58c849d4475f\"\n      },\n      \"execution_count\": null,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"name\": \"stdout\",\n          \"text\": [\n            \"trainable params: 8650752 || all params: 10597552128 || trainable%: 0.08162971878329976\\n\"\n          ]\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"source\": [\n        \"Let's load a common dataset, english quotes, to fine tune our model on famous quotes.\"\n      ],\n      \"metadata\": {\n        \"id\": \"FCc64bfnmd3j\"\n      }\n    },\n    {\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"from datasets import load_dataset\\n\",\n        \"\\n\",\n        \"data = load_dataset(\\\"Abirate/english_quotes\\\")\\n\",\n        \"data = data.map(lambda samples: tokenizer(samples[\\\"quote\\\"]), batched=True)\"\n      ],\n      \"metadata\": {\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 232,\n          \"referenced_widgets\": [\n            \"07882da20e7740bb9d2c8a3704d00c92\",\n            \"3ecf029b84ea46a9ac7b85aa0d2e07d2\",\n            \"80c43d494b74485d86f5a85b1a3c0101\",\n            \"e51a711db6ae462589fbde4f8c4d1032\",\n            \"62b4817a071c4d38a446cf2836407838\",\n            \"6f8a29d2f4034cffa5201853f3ba3726\",\n            \"be0e55802e604a9681bccba4db044103\",\n            \"25ccac5c9da94eabae3d517e04f5ac43\",\n            \"7cec16b3591d477b8d395944e8a70ebf\",\n            \"19df92341e3d413b84826f7cc6931061\",\n            \"b2d8b323c0984353a697716ed7a6d127\",\n            \"7e6769a945d649ec82ef7ed2c2e3357f\",\n            \"9db11bf2430f4493b01ce9e814ddb4a7\",\n            \"cd250601aece4c1cb48a639b0f25d118\",\n            \"a2842acc3b8143b9b11b279052097f3a\",\n            \"8506ac0bb0204961afc7453d0c077680\",\n            \"5449f345b9584403b92ccaadcc009593\",\n            \"9c084c63a4e1476986a1389cb2a43eeb\",\n            \"d0167506afcc4602bd80a288bf191127\",\n            \"5ca20d88ea7a4b4dbe6fef01928ac183\",\n            \"1a24ab331b7a43fb8a8075c7f15b6541\",\n            \"59610889b58644979dcfb1440550d0df\",\n            \"73e1afe0180149f68988402ce03412c3\",\n            \"514d45ef359c44498c85dcfb58682111\",\n            \"03077ea71a23485aabe52788f8ebab78\",\n            \"0714173ebbb14d0d83782e0f0412567b\",\n            \"c2d371e15b374beda61ebc435981cd27\",\n            \"bf709f8bfc744e448d7367c8f43f1853\",\n            \"42e3d0c1fdb946469114ba0388e6b6ff\",\n            \"fbaadca189b448a3b17449b74136ffd9\",\n            \"421c682331134cf0b24b361b0245e15c\",\n            \"e9f39163fa344342a7c659a3cb782e46\",\n            \"fcaceddd0d8c4847ad8ce2490d126f6d\",\n            \"cf1bbb51baeb41918c4e35528337807f\",\n            \"80d90b93cc72422c908d1cd3dbf5c8f5\",\n            \"6182639b2982481d93ef75b50dab082a\",\n            \"baa95e82bca9445ebce3cf22280b950a\",\n            \"85b47afe6fbc4710b71f187781e08ec7\",\n            \"cade5b7ee3304ecdafdd17028e19c7cd\",\n            \"e790c00fe7a647c3973259d698c26c05\",\n            \"18c32d78b8cf4cedbb55b68073aef898\",\n            \"72a104f448284a14bb287afeced5bda9\",\n            \"1228a27445f8423580bf6a717f6629a6\",\n            \"9bde4e4848e64c4c94ab27f7f5cec308\",\n            \"8ebb69d66f354fc48694784a8f36ec63\",\n            \"8647a6d17b194650b404918e220a7ee0\",\n            \"379d5e2345e34c5889e531f0fc455741\",\n            \"92c4a7037df4432788661e13d5e668ec\",\n            \"bcac72f7ce134638b39f033c0f568fb3\",\n            \"18374440528a41ef9a8f11367320c171\",\n            \"80634feb58104ccb85ce4696c6a69e31\",\n            \"4723270e31f44e42b71c093eb9604258\",\n            \"dd195f8647524e04bc9480f2fa85f5ce\",\n            \"2096ac97506f4fc2aa2a70bee61aea72\",\n            \"a7c6c15bc3ef471fb0d4e28adadbe41d\",\n            \"b2b6722f496a49918740dfc9a97789ab\",\n            \"ef5207843c4a4aa8b1e366a5d0ae7536\",\n            \"4344f1b57dd5424e9e5c046aa42cfe35\",\n            \"76f31d54e20b45269e2128340384374e\",\n            \"18732888c00f442bae580b94438e1916\",\n            \"f0d84153556e4bdbbb73c1dd80974d24\",\n            \"661c5bf64cff4f1893865982c68ca71c\",\n            \"4184603a7e74411a9bec05d6b5939694\",\n            \"477d9f12c1964aeba9258b7990f51aa8\",\n            \"63cb22480eae40c7a91796718fe78fb3\",\n            \"90feb97f2c20432eb2f54485ea8d3087\",\n            \"6ec3c77d01994f0a97bcd83c542ea8a9\",\n            \"2c8f8046c269429a894a1910cfdd5921\",\n            \"55608d21c20641a5a9798e721c84290a\",\n            \"0c14280af7d04e23bc0bd0a83b4af90e\",\n            \"b767cbf30ee6433ba8c006ded500df2b\",\n            \"6a40b6ae495c4ef882611b4a066a5ae2\",\n            \"5924fee94134449ea78570bce67d3aef\",\n            \"8af4e90d1af944c98e6d95f75fbe6c11\",\n            \"22b3781abd96465c8ae8740432a95732\",\n            \"ecfb53b9fa4b43eab3c815ee9a90d3c7\",\n            \"144b47000c76414fb7051640e6d0bb33\"\n          ]\n        },\n        \"id\": \"s6f4z8EYmcJ6\",\n        \"outputId\": \"b12f8293-da00-4514-d246-27fa525b169d\"\n      },\n      \"execution_count\": null,\n      \"outputs\": [\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading readme:   0%|          | 0.00/5.55k [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"07882da20e7740bb9d2c8a3704d00c92\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"stream\",\n          \"name\": \"stdout\",\n          \"text\": [\n            \"Downloading and preparing dataset json/Abirate--english_quotes to /root/.cache/huggingface/datasets/Abirate___json/Abirate--english_quotes-6e72855d06356857/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4...\\n\"\n          ]\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"7e6769a945d649ec82ef7ed2c2e3357f\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Downloading data:   0%|          | 0.00/647k [00:00<?, ?B/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"73e1afe0180149f68988402ce03412c3\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"cf1bbb51baeb41918c4e35528337807f\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Generating train split: 0 examples [00:00, ? examples/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"8ebb69d66f354fc48694784a8f36ec63\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"stream\",\n          \"name\": \"stdout\",\n          \"text\": [\n            \"Dataset json downloaded and prepared to /root/.cache/huggingface/datasets/Abirate___json/Abirate--english_quotes-6e72855d06356857/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4. Subsequent calls will reuse this data.\\n\"\n          ]\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"  0%|          | 0/1 [00:00<?, ?it/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"b2b6722f496a49918740dfc9a97789ab\"\n            }\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"Map:   0%|          | 0/2508 [00:00<?, ? examples/s]\"\n            ],\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"version_major\": 2,\n              \"version_minor\": 0,\n              \"model_id\": \"6ec3c77d01994f0a97bcd83c542ea8a9\"\n            }\n          },\n          \"metadata\": {}\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"source\": [\n        \"Run the cell below to run the training! For the sake of the demo, we just ran it for few steps just to showcase how to use this integration with existing tools on the HF ecosystem.\"\n      ],\n      \"metadata\": {\n        \"id\": \"_0MOtwf3zdZp\"\n      }\n    },\n    {\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"import transformers\\n\",\n        \"\\n\",\n        \"# needed for gpt-neo-x tokenizer\\n\",\n        \"tokenizer.pad_token = tokenizer.eos_token\\n\",\n        \"\\n\",\n        \"trainer = transformers.Trainer(\\n\",\n        \"    model=model,\\n\",\n        \"    train_dataset=data[\\\"train\\\"],\\n\",\n        \"    args=transformers.TrainingArguments(\\n\",\n        \"        per_device_train_batch_size=1,\\n\",\n        \"        gradient_accumulation_steps=4,\\n\",\n        \"        warmup_steps=2,\\n\",\n        \"        max_steps=10,\\n\",\n        \"        learning_rate=2e-4,\\n\",\n        \"        fp16=True,\\n\",\n        \"        logging_steps=1,\\n\",\n        \"        output_dir=\\\"outputs\\\",\\n\",\n        \"        optim=\\\"paged_adamw_8bit\\\"\\n\",\n        \"    ),\\n\",\n        \"    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),\\n\",\n        \")\\n\",\n        \"model.config.use_cache = False  # silence the warnings. Please re-enable for inference!\\n\",\n        \"trainer.train()\"\n      ],\n      \"metadata\": {\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 495\n        },\n        \"id\": \"jq0nX33BmfaC\",\n        \"outputId\": \"b849f09a-5e9e-4492-8231-cca3d42da22e\"\n      },\n      \"execution_count\": null,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"name\": \"stderr\",\n          \"text\": [\n            \"You're using a GPTNeoXTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\\n\",\n            \"/usr/local/lib/python3.10/dist-packages/transformers/models/gpt_neox/modeling_gpt_neox.py:229: UserWarning: where received a uint8 condition tensor. This behavior is deprecated and will be removed in a future version of PyTorch. Use a boolean condition instead. (Triggered internally at ../aten/src/ATen/native/TensorCompare.cpp:493.)\\n\",\n            \"  attn_scores = torch.where(causal_mask, attn_scores, mask_value)\\n\"\n          ]\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"text/plain\": [\n              \"<IPython.core.display.HTML object>\"\n            ],\n            \"text/html\": [\n              \"\\n\",\n              \"    <div>\\n\",\n              \"      \\n\",\n              \"      <progress value='10' max='10' style='width:300px; height:20px; vertical-align: middle;'></progress>\\n\",\n              \"      [10/10 02:27, Epoch 0/1]\\n\",\n              \"    </div>\\n\",\n              \"    <table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n              \"  <thead>\\n\",\n              \" <tr style=\\\"text-align: left;\\\">\\n\",\n              \"      <th>Step</th>\\n\",\n              \"      <th>Training Loss</th>\\n\",\n              \"    </tr>\\n\",\n              \"  </thead>\\n\",\n              \"  <tbody>\\n\",\n              \"    <tr>\\n\",\n              \"      <td>1</td>\\n\",\n              \"      <td>2.373500</td>\\n\",\n              \"    </tr>\\n\",\n              \"    <tr>\\n\",\n              \"      <td>2</td>\\n\",\n              \"      <td>3.283200</td>\\n\",\n              \"    </tr>\\n\",\n              \"    <tr>\\n\",\n              \"      <td>3</td>\\n\",\n              \"      <td>2.290500</td>\\n\",\n              \"    </tr>\\n\",\n              \"    <tr>\\n\",\n              \"      <td>4</td>\\n\",\n              \"      <td>2.834700</td>\\n\",\n              \"    </tr>\\n\",\n              \"    <tr>\\n\",\n              \"      <td>5</td>\\n\",\n              \"      <td>2.635500</td>\\n\",\n              \"    </tr>\\n\",\n              \"    <tr>\\n\",\n              \"      <td>6</td>\\n\",\n              \"      <td>2.185200</td>\\n\",\n              \"    </tr>\\n\",\n              \"    <tr>\\n\",\n              \"      <td>7</td>\\n\",\n              \"      <td>2.260900</td>\\n\",\n              \"    </tr>\\n\",\n              \"    <tr>\\n\",\n              \"      <td>8</td>\\n\",\n              \"      <td>1.506300</td>\\n\",\n              \"    </tr>\\n\",\n              \"    <tr>\\n\",\n              \"      <td>9</td>\\n\",\n              \"      <td>2.470600</td>\\n\",\n              \"    </tr>\\n\",\n              \"    <tr>\\n\",\n              \"      <td>10</td>\\n\",\n              \"      <td>2.498200</td>\\n\",\n              \"    </tr>\\n\",\n              \"  </tbody>\\n\",\n              \"</table><p>\"\n            ]\n          },\n          \"metadata\": {}\n        },\n        {\n          \"output_type\": \"execute_result\",\n          \"data\": {\n            \"text/plain\": [\n              \"TrainOutput(global_step=10, training_loss=2.4338608503341677, metrics={'train_runtime': 166.0171, 'train_samples_per_second': 0.241, 'train_steps_per_second': 0.06, 'total_flos': 99255709532160.0, 'train_loss': 2.4338608503341677, 'epoch': 0.02})\"\n            ]\n          },\n          \"metadata\": {},\n          \"execution_count\": 7\n        }\n      ]\n    }\n  ]\n}"
  },
  {
    "path": "config.py",
    "content": "import texar as tx\ndcoder_config = {\n    'dim': 768,\n    'num_blocks': 6,\n    'multihead_attention': {\n        'num_heads': 8,\n        'output_dim': 768\n        # See documentation for more optional hyperparameters\n    },\n    'position_embedder_hparams': {\n        'dim': 768\n    },\n    'initializer': {\n        'type': 'variance_scaling_initializer',\n        'kwargs': {\n            'scale': 1.0,\n            'mode': 'fan_avg',\n            'distribution': 'uniform',\n        },\n    },\n    'poswise_feedforward': tx.modules.default_transformer_poswise_net_hparams(\n        output_dim=768)\n}\n\nloss_label_confidence = 0.9\n\nrandom_seed = 1234\nbeam_width = 5\nalpha = 0.6\nhidden_dim = 768\n\n\nopt = {\n    'optimizer': {\n        'type': 'AdamOptimizer',\n        'kwargs': {\n            'beta1': 0.9,\n            'beta2': 0.997,\n            'epsilon': 1e-9\n        }\n    }\n}\n\n\n#warmup steps must be 0.1% of number of iterations\nlr = {\n    'learning_rate_schedule': 'constant.linear_warmup.rsqrt_decay.rsqrt_depth',\n    'lr_constant': 2 * (hidden_dim ** -0.5),\n    'static_lr': 1e-3,\n    'warmup_steps': 10000,\n}\n\nbos_token_id =101\neos_token_id = 102\n\nmodel_dir= \"./models\"\nrun_mode= \"train_and_evaluate\"\nbatch_size = 1\neval_batch_size = 1\n\nmax_train_steps = 100000\n\ndisplay_steps = 1\ncheckpoint_steps = 1000\neval_steps = 50000\n\nmax_decoding_length = 400\n\nmax_seq_length_src = 512\nmax_seq_length_tgt = 400\n\nepochs =10\n\nis_distributed = False\n\n\ndata_dir = \"data/\"\n\ntrain_out_file = \"data/train.tf_record\"\neval_out_file = \"data/eval.tf_record\"\n\nbert_pretrain_dir=\"./bert_uncased_model\"\n\ntrain_story = \"data/train_story.txt\"\ntrain_summ = \"data/train_summ.txt\"\n\neval_story = \"data/eval_story.txt\"\neval_summ = \"data/eval_summ.txt\"\n\nbert_pretrain_dir = \"./uncased_L-12_H-768_A-12\"\n\n"
  },
  {
    "path": "data/eval_story.txt",
    "content": "The new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions."
  },
  {
    "path": "data/eval_summ.txt",
    "content": "The new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions."
  },
  {
    "path": "data/train_story.txt",
    "content": "The new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions."
  },
  {
    "path": "data/train_summ.txt",
    "content": "The new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions.\nThe new question is that we know how many class data includes, but what if number of class is unknow in data. This is kind of like hyperparameter in KNN or regressions."
  },
  {
    "path": "main.py",
    "content": "import sys\n\nif not 'texar_repo' in sys.path:\n  sys.path += ['texar_repo']\nimport tensorflow as tf\nimport texar as tx\nimport numpy as np\nfrom config import *\nfrom model import *\n\nimport os\n\n\ndef _train_epoch(sess, epoch, step, smry_writer):\n        \n            \n        fetches = {\n            'step': global_step,\n            'train_op': train_op,\n            'smry': summary_merged,\n            'loss': mle_loss,\n        }\n\n        while True:\n            try:\n              feed_dict = {\n                iterator.handle: iterator.get_handle(sess, 'train'),\n                tx.global_mode(): tf.estimator.ModeKeys.TRAIN,\n              }\n              op = sess.run([batch],feed_dict)\n              feed_dict = {\n                   src_input_ids:op[0]['src_input_ids'],\n                   src_segment_ids : op[0]['src_segment_ids'],\n                   tgt_input_ids:op[0]['tgt_input_ids'],\n\n                   labels:op[0]['tgt_labels'],\n                   learning_rate: utils.get_lr(step, lr),\n                   tx.global_mode(): tf.estimator.ModeKeys.TRAIN\n                }\n\n\n              fetches_ = sess.run(fetches, feed_dict=feed_dict)\n              step, loss = fetches_['step'], fetches_['loss']\n              if step and step % display_steps == 0:\n                  logger.info('step: %d, loss: %.4f', step, loss)\n                  print('step: %d, loss: %.4f' % (step, loss))\n                  smry_writer.add_summary(fetches_['smry'], global_step=step)\n\n              if step and step % checkpoint_steps == 0:\n                  model_path = model_dir+\"/model_\"+str(step)+\".ckpt\"\n                  logger.info('saving model to %s', model_path)\n                  print('saving model to %s' % model_path)\n                  saver.save(sess, model_path)\n              if step and step % eval_steps == 0:\n                  _eval_epoch(sess, epoch, mode='eval')\n            except tf.errors.OutOfRangeError:\n                break\n\n        return step\n\ndef _eval_epoch(sess, epoch, mode):\n\n        references, hypotheses = [], []\n        bsize = test_batch_size\n        fetches = {\n                'inferred_ids': inferred_ids,\n            }\n        bno=0\n        while True:\n            \n            #print(\"Temp\",temp)\n            try:\n              print(\"Batch\",bno)\n              feed_dict = {\n              iterator.handle: iterator.get_handle(sess, 'eval'),\n              tx.global_mode(): tf.estimator.ModeKeys.EVAL,\n              }\n              op = sess.run([batch],feed_dict)\n              feed_dict = {\n                   src_input_ids:op[0]['src_input_ids'],\n                   src_segment_ids : op[0]['src_segment_ids'],\n                   tx.global_mode(): tf.estimator.ModeKeys.EVAL\n              }\n              fetches_ = sess.run(fetches, feed_dict=feed_dict)\n              labels = op[0]['tgt_labels']\n              hypotheses.extend(h.tolist() for h in fetches_['inferred_ids'])\n              references.extend(r.tolist() for r in labels)\n              hypotheses = utils.list_strip_eos(hypotheses, eos_token_id)\n              references = utils.list_strip_eos(references, eos_token_id)\n              bno = bno+1\n              \n            except tf.errors.OutOfRangeError:\n                break\n\n\n        if mode == 'eval':\n            # Writes results to files to evaluate BLEU\n            # For 'eval' mode, the BLEU is based on token ids (rather than\n            # text tokens) and serves only as a surrogate metric to monitor\n            # the training process\n            fname = os.path.join(model_dir, 'tmp.eval')\n            \n            hypotheses = tx.utils.str_join(hypotheses)\n            references = tx.utils.str_join(references)\n            hyp_fn, ref_fn = tx.utils.write_paired_text(\n                hypotheses, references, fname, mode='s')\n            eval_bleu = bleu_wrapper(ref_fn, hyp_fn, case_sensitive=True)\n            eval_bleu = 100. * eval_bleu\n            logger.info('epoch: %d, eval_bleu %.4f', epoch, eval_bleu)\n            print('epoch: %d, eval_bleu %.4f' % (epoch, eval_bleu))\n\n            if eval_bleu > best_results['score']:\n                logger.info('epoch: %d, best bleu: %.4f', epoch, eval_bleu)\n                best_results['score'] = eval_bleu\n                best_results['epoch'] = epoch\n                model_path = os.path.join(model_dir, 'best-model.ckpt')\n               \n                logger.info('saving model to %s', model_path)\n                print('saving model to %s' % model_path)\n                saver.save(sess, model_path)\n\n\ntx.utils.maybe_create_dir(model_dir)\nlogging_file= os.path.join(model_dir,\"logging.txt\")\nlogger = utils.get_logger(logging_file)\nwith tf.Session() as sess:\n    sess.run(tf.global_variables_initializer())\n    sess.run(tf.local_variables_initializer())\n    sess.run(tf.tables_initializer())\n\n    smry_writer = tf.summary.FileWriter(model_dir, graph=sess.graph)\n\n    if run_mode == 'train_and_evaluate':\n        logger.info('Begin running with train_and_evaluate mode')\n\n        if tf.train.latest_checkpoint(model_dir) is not None:\n            logger.info('Restore latest checkpoint in %s' % model_dir)\n            saver.restore(sess, tf.train.latest_checkpoint(model_dir))\n        \n        iterator.initialize_dataset(sess)\n\n        step = 0\n        for epoch in range(epochs):\n          iterator.restart_dataset(sess, 'train')\n          step = _train_epoch(sess, epoch, step, smry_writer)\n\n    \n\n    else:\n        raise ValueError('Unknown mode: {}'.format(run_mode))\n"
  },
  {
    "path": "model.py",
    "content": "\nimport sys\nif not 'texar_repo' in sys.path:\n  sys.path += ['texar_repo']\n\nfrom config import *\nfrom preprocess import file_based_input_fn_builder\nimport os\nimport csv\nimport collections\nfrom texar_repo.examples.bert.utils import data_utils, model_utils, tokenization\nimport importlib\nimport tensorflow as tf\nimport texar as tx \nfrom texar_repo.examples.bert import config_classifier as config_downstream\nfrom texar_repo.texar.utils import transformer_utils\nfrom texar_repo.examples.transformer.utils import data_utils, utils\nfrom texar_repo.examples.transformer.bleu_tool import bleu_wrapper\n\ntrain_dataset = file_based_input_fn_builder(\n            input_file=train_out_file,\n            max_seq_length_src=max_seq_length_src,\n            max_seq_length_tgt =max_seq_length_tgt,\n            is_training=True,\n            drop_remainder=True,\n            is_distributed=is_distributed)({'batch_size': batch_size})\n\neval_dataset = file_based_input_fn_builder(\n            input_file=eval_out_file,\n            max_seq_length_src=max_seq_length_src,\n            max_seq_length_tgt =max_seq_length_tgt,\n            is_training=True,\n            drop_remainder=True,\n            is_distributed=is_distributed)({'batch_size': eval_batch_size})\n\n\nbert_config = model_utils.transform_bert_to_texar_config(\n            os.path.join(bert_pretrain_dir, 'bert_config.json'))\n\n\n\ntokenizer = tokenization.FullTokenizer(\n        vocab_file=os.path.join(bert_pretrain_dir, 'vocab.txt'),\n        do_lower_case=True)\n\nvocab_size = len(tokenizer.vocab)\n\nsrc_input_ids = tf.placeholder(tf.int64, shape=(None, None))\nsrc_segment_ids = tf.placeholder(tf.int64, shape=(None, None))\ntgt_input_ids = tf.placeholder(tf.int64, shape=(None, None))\ntgt_segment_ids = tf.placeholder(tf.int64, shape=(None, None))\n\nbatch_size = tf.shape(src_input_ids)[0]\n\nsrc_input_length = tf.reduce_sum(1 - tf.to_int32(tf.equal(src_input_ids, 0)),\n                     axis=1)\ntgt_input_length = tf.reduce_sum(1 - tf.to_int32(tf.equal(tgt_input_ids, 0)),\n                     axis=1)\n\nlabels = tf.placeholder(tf.int64, shape=(None, None))\nis_target = tf.to_float(tf.not_equal(labels, 0))\n\n\nglobal_step = tf.Variable(0, dtype=tf.int64, trainable=False)\n\nlearning_rate = tf.placeholder(tf.float64, shape=(), name='lr')\n\niterator = tx.data.FeedableDataIterator({\n        'train': train_dataset, 'eval': eval_dataset})\n\nbatch = iterator.get_next()\n\n#encoder Bert model\nprint(\"Intializing the Bert Encoder Graph\")\nwith tf.variable_scope('bert'):\n        embedder = tx.modules.WordEmbedder(\n            vocab_size=bert_config.vocab_size,\n            hparams=bert_config.embed)\n        word_embeds = embedder(src_input_ids)\n\n        # Creates segment embeddings for each type of tokens.\n        segment_embedder = tx.modules.WordEmbedder(\n            vocab_size=bert_config.type_vocab_size,\n            hparams=bert_config.segment_embed)\n        segment_embeds = segment_embedder(src_segment_ids)\n\n        input_embeds = word_embeds + segment_embeds\n\n        # The BERT model (a TransformerEncoder)\n        encoder = tx.modules.TransformerEncoder(hparams=bert_config.encoder)\n        encoder_output = encoder(input_embeds, src_input_length)\n        \n        # Builds layers for downstream classification, which is also initialized\n        # with BERT pre-trained checkpoint.\n        with tf.variable_scope(\"pooler\"):\n            # Uses the projection of the 1st-step hidden vector of BERT output\n            # as the representation of the sentence\n            bert_sent_hidden = tf.squeeze(encoder_output[:, 0:1, :], axis=1)\n            bert_sent_output = tf.layers.dense(\n                bert_sent_hidden, config_downstream.hidden_dim,\n                activation=tf.tanh)\n            output = tf.layers.dropout(\n                bert_sent_output, rate=0.1, training=tx.global_mode_train())\n\n\nprint(\"loading the bert pretrained weights\")\n# Loads pretrained BERT model parameters\ninit_checkpoint = os.path.join(bert_pretrain_dir, 'bert_model.ckpt')\nmodel_utils.init_bert_checkpoint(init_checkpoint)\n\ntgt_embedding = tf.concat(\n    [tf.zeros(shape=[1, embedder.dim]), embedder.embedding[1:, :]], axis=0)\n\ndecoder = tx.modules.TransformerDecoder(embedding=tgt_embedding,\n                             hparams=dcoder_config)\n# For training\noutputs = decoder(\n    memory=encoder_output,\n    memory_sequence_length=src_input_length,\n    inputs=embedder(tgt_input_ids),\n    sequence_length=tgt_input_length,\n    decoding_strategy='train_greedy',\n    mode=tf.estimator.ModeKeys.TRAIN\n)\n\nmle_loss = transformer_utils.smoothing_cross_entropy(\n        outputs.logits, labels, vocab_size, loss_label_confidence)\nmle_loss = tf.reduce_sum(mle_loss * is_target) / tf.reduce_sum(is_target)\n\ntvars =tf.trainable_variables()\n\nnon_bert_vars = [var for var in tvars if 'bert' not in var.name]\n\n\n\n\n\ntrain_op = tx.core.get_train_op(\n        mle_loss,\n        learning_rate=learning_rate,\n        variables= non_bert_vars,\n        global_step=global_step,\n        hparams=opt)\n\ntf.summary.scalar('lr', learning_rate)\ntf.summary.scalar('mle_loss', mle_loss)\nsummary_merged = tf.summary.merge_all()\n\nsaver = tf.train.Saver(max_to_keep=5)\nbest_results = {'score': 0, 'epoch': -1}\n\nstart_tokens = tf.fill([tx.utils.get_batch_size(src_input_ids)],\n                       bos_token_id)\npredictions = decoder(\n    memory=encoder_output,\n    memory_sequence_length=src_input_length,\n    decoding_strategy='infer_greedy',\n    beam_width=beam_width,\n    alpha=alpha,\n    start_tokens=start_tokens,\n    end_token=eos_token_id,\n    max_decoding_length=400,\n    mode=tf.estimator.ModeKeys.PREDICT\n)\nif beam_width <= 1:\n    inferred_ids = predictions[0].sample_id\nelse:\n    # Uses the best sample by beam search\n    inferred_ids = predictions['sample_id'][:, :, 0]\n\n\n\n"
  },
  {
    "path": "models/logging.txt",
    "content": "2019-03-08 20:02:04,048:INFO:Begin running with train_and_evaluate mode\n2019-03-08 20:03:50,512:INFO:Begin running with train_and_evaluate mode\n2019-03-08 20:05:00,060:INFO:Begin running with train_and_evaluate mode\n2019-03-08 20:08:14,915:INFO:Begin running with train_and_evaluate mode\n2019-03-08 20:12:42,894:INFO:Begin running with train_and_evaluate mode\n2019-03-08 20:22:29,211:INFO:Begin running with train_and_evaluate mode\n2019-03-08 20:22:39,003:INFO:step: 1, loss: 11.7971\n2019-03-08 20:22:42,072:INFO:step: 2, loss: 11.7444\n2019-03-08 20:22:45,111:INFO:step: 3, loss: 11.6753\n2019-03-08 20:22:48,523:INFO:step: 4, loss: 11.8856\n2019-03-08 20:22:51,878:INFO:step: 5, loss: 11.7765\n2019-03-08 20:22:55,144:INFO:step: 6, loss: 11.9311\n2019-03-08 20:22:58,406:INFO:step: 7, loss: 11.8430\n2019-03-08 20:23:01,664:INFO:step: 8, loss: 11.7669\n2019-03-08 20:23:04,947:INFO:step: 9, loss: 11.7373\n2019-03-08 20:23:08,286:INFO:step: 10, loss: 11.9579\n2019-03-08 20:23:11,635:INFO:step: 11, loss: 11.5600\n2019-03-08 20:23:15,028:INFO:step: 12, loss: 11.6753\n2019-03-08 20:23:18,427:INFO:step: 13, loss: 11.5919\n2019-03-08 20:23:21,835:INFO:step: 14, loss: 11.5611\n2019-03-08 20:23:25,236:INFO:step: 15, loss: 11.3855\n2019-03-08 20:23:28,616:INFO:step: 16, loss: 11.3497\n2019-03-08 20:23:31,978:INFO:step: 17, loss: 11.3501\n2019-03-08 20:23:35,314:INFO:step: 18, loss: 11.5671\n2019-03-08 20:23:38,646:INFO:step: 19, loss: 11.3275\n2019-03-08 20:23:41,964:INFO:step: 20, loss: 11.1347\n2019-03-08 20:23:45,385:INFO:step: 21, loss: 11.2892\n2019-03-08 20:23:48,854:INFO:step: 22, loss: 10.9162\n2019-03-08 20:23:52,347:INFO:step: 23, loss: 11.0379\n2019-03-08 20:23:55,775:INFO:step: 24, loss: 11.0149\n2019-03-08 20:23:59,314:INFO:step: 25, loss: 10.7168\n2019-03-08 20:24:02,892:INFO:step: 26, loss: 10.9317\n2019-03-08 20:24:06,576:INFO:step: 27, loss: 10.8448\n2019-03-08 20:24:10,262:INFO:step: 28, loss: 10.7415\n2019-03-08 20:24:13,979:INFO:step: 29, loss: 10.8425\n2019-03-08 20:24:17,663:INFO:step: 30, loss: 10.7316\n2019-03-08 20:24:21,293:INFO:step: 31, loss: 10.6841\n2019-03-08 20:24:24,977:INFO:step: 32, loss: 10.4804\n2019-03-08 20:24:28,662:INFO:step: 33, loss: 10.2873\n"
  },
  {
    "path": "preprocess.py",
    "content": "\nimport sys\nif not 'texar_repo' in sys.path:\n  sys.path += ['texar_repo']\nfrom config import *\nfrom texar_repo.examples.bert.utils import data_utils, model_utils, tokenization\nfrom texar_repo.examples.transformer.utils import data_utils, utils\nimport tensorflow as tf\nimport os\nimport csv\nimport collections\n\nclass InputExample():\n    \"\"\"A single training/test example for simple sequence classification.\"\"\"\n\n    def __init__(self, guid, text_a, text_b=None):\n        \"\"\"Constructs a InputExample.\n        Args:\n            guid: Unique id for the example.\n            text_a: string. The untokenized text of the first sequence.\n                For single sequence tasks, only this sequence must be specified.\n            text_b: (Optional) string. The untokenized text of the second\n                sequence. Only must be specified for sequence pair tasks.\n            label: (Optional) string. The label of the example. This should be\n                specified for train and dev examples, but not for test examples.\n        \"\"\"\n        self.guid = guid\n        self.src_txt = text_a\n        self.tgt_txt = text_b\n        \nclass InputFeatures():\n    \"\"\"A single set of features of data.\"\"\"\n\n    def __init__(self, src_input_ids,src_input_mask,src_segment_ids,tgt_input_ids,tgt_input_mask,tgt_labels):\n        self.src_input_ids = src_input_ids\n        self.src_input_mask = src_input_mask\n        self.src_segment_ids = src_segment_ids\n        self.tgt_input_ids = tgt_input_ids\n        self.tgt_input_mask = tgt_input_mask \n        self.tgt_labels = tgt_labels\n        \n       \nclass DataProcessor(object):\n    \"\"\"Base class for data converters for sequence classification data sets.\"\"\"\n\n    def get_train_examples(self, data_dir):\n        \"\"\"Gets a collection of `InputExample`s for the train set.\"\"\"\n        raise NotImplementedError()\n\n    def get_dev_examples(self, data_dir):\n        \"\"\"Gets a collection of `InputExample`s for the dev set.\"\"\"\n        raise NotImplementedError()\n\n    def get_test_examples(self, data_dir):\n        \"\"\"Gets a collection of `InputExample`s for prediction.\"\"\"\n        raise NotImplementedError()\n\n    def get_labels(self):\n        \"\"\"Gets the list of labels for this data set.\"\"\"\n        raise NotImplementedError()\n\n    @classmethod\n    def _read_tsv(cls, input_file, quotechar=None):\n        \"\"\"Reads a tab separated value file.\"\"\"\n        with tf.gfile.Open(input_file, \"r\") as f:\n            reader = csv.reader(f, delimiter=\"\\t\", quotechar=quotechar)\n            lines = []\n            i = 0\n            for line in reader:\n                lines.append(line)\n        return lines\n\n\n    @classmethod\n    def _read_file(cls, input_file, quotechar=None):\n        \"\"\"Reads a tab separated value file.\"\"\"\n        with tf.gfile.Open(input_file, \"r\") as f:\n            reader = csv.reader(f, delimiter=\"\\n\", quotechar=quotechar)\n            lines = []\n            i = 0\n            for line in reader:\n                lines.append(line)\n        return lines\n      \n      \nclass CNNDailymail(DataProcessor):\n    \"\"\"Processor for the CoLA data set (GLUE version).\"\"\"\n\n    def get_train_examples(self, data_dir):\n        \"\"\"See base class.\"\"\"\n        return self._create_examples(\n            self._read_file(os.path.join(data_dir, \"train_story.txt\")),self._read_file(os.path.join(data_dir, \"train_summ.txt\")),\n            \"train\")\n\n    def get_dev_examples(self, data_dir):\n        \"\"\"See base class.\"\"\"\n        return self._create_examples(\n            self._read_file(os.path.join(data_dir, \"eval_story.txt\")),self._read_file(os.path.join(data_dir, \"eval_summ.txt\")),\n            \"dev\")\n\n    def get_test_examples(self, data_dir):\n        \"\"\"See base class.\"\"\"\n        return self._create_examples(\n            self._read_file(os.path.join(data_dir, \"test_story.txt\")),self._read_file(os.path.join(data_dir, \"test_summ.txt\")),\n            \"test\")\n\n    def _create_examples(self, src_lines,tgt_lines,set_type):\n        examples = [] \n        for i,data in enumerate(zip(src_lines,tgt_lines)):\n            guid = \"%s-%s\" % (set_type, i)\n            if set_type == \"test\" and i == 0:\n                continue\n            else:\n                #print(data)\n                if len(data[0])==0 or len(data[1])==0:\n                  continue\n                src_lines = tokenization.convert_to_unicode(data[0][0])\n                tgt_lines = tokenization.convert_to_unicode(data[1][0])\n                examples.append(InputExample(guid=guid, text_a=src_lines,\n                                         text_b=tgt_lines))\n        return examples\n  \n  \ndef file_based_convert_examples_to_features(\n        examples, max_seq_length_src,max_seq_length_tgt,tokenizer, output_file):\n    \"\"\"Convert a set of `InputExample`s to a TFRecord file.\"\"\"\n\n    writer = tf.python_io.TFRecordWriter(output_file)\n\n    for (ex_index, example) in enumerate(examples):\n        #print(\"ex_index\",ex_index)\n\n        if (ex_index+1) %1000 == 0 :\n          print(\"------------processed..{}...examples\".format(ex_index))\n          \n        feature = convert_single_example(ex_index, example,\n                                         max_seq_length_src,max_seq_length_tgt,tokenizer)\n\n        def create_int_feature(values):\n            return tf.train.Feature(\n                int64_list=tf.train.Int64List(value=list(values)))\n\n        features = collections.OrderedDict()\n        features[\"src_input_ids\"] = create_int_feature(feature.src_input_ids)\n        features[\"src_input_mask\"] = create_int_feature(feature.src_input_mask)\n        features[\"src_segment_ids\"] = create_int_feature(feature.src_segment_ids)\n\n        features[\"tgt_input_ids\"] = create_int_feature(feature.tgt_input_ids)\n        features[\"tgt_input_mask\"] = create_int_feature(feature.tgt_input_mask)\n        features['tgt_labels'] = create_int_feature(feature.tgt_labels)\n        \n        \n        \n        #print(feature.tgt_labels)\n        \n\n        tf_example = tf.train.Example(\n            features=tf.train.Features(feature=features))\n        writer.write(tf_example.SerializeToString())\n\n\ndef convert_single_example(ex_index, example, max_seq_length_src,max_seq_length_tgt,\n                           tokenizer):\n    \"\"\"Converts a single `InputExample` into a single `InputFeatures`.\"\"\"\n    \"\"\"\n    label_map = {}\n    for (i, label) in enumerate(label_list):\n        label_map[label] = i\n    \"\"\"\n    tokens_a = tokenizer.tokenize(example.src_txt)\n    tokens_b = tokenizer.tokenize(example.tgt_txt)\n\n\n    # Modifies `tokens_a` and `tokens_b` in place so that the total\n    # length is less than the specified length.\n    # Account for [CLS], [SEP], [SEP] with \"- 3\"\n    if len(tokens_a) > max_seq_length_src - 2:\n            tokens_a = tokens_a[0:(max_seq_length_src - 2)]\n    \n    if len(tokens_b) > max_seq_length_tgt - 2:\n            tokens_b = tokens_b[0:(max_seq_length_tgt - 2)]\n\n    \n    tokens_src = []\n    segment_ids_src = []\n    tokens_src.append(\"[CLS]\")\n    segment_ids_src.append(0)\n    for token in tokens_a:\n        tokens_src.append(token)\n        segment_ids_src.append(0)\n    tokens_src.append(\"[SEP]\")\n    segment_ids_src.append(0)\n  \n\n    tokens_tgt = []\n    segment_ids_tgt = []\n    tokens_tgt.append(\"[CLS]\")\n    #segment_ids_tgt.append(0)\n    for token in tokens_b:\n        tokens_tgt.append(token)\n        #segment_ids_tgt.append(0)\n    tokens_tgt.append(\"[SEP]\")\n    #segment_ids_tgt.append(0)\n\n    input_ids_src = tokenizer.convert_tokens_to_ids(tokens_src)\n   \n    \n\n    input_ids_tgt = tokenizer.convert_tokens_to_ids(tokens_tgt)\n\n    labels_tgt = input_ids_tgt[1:]\n    \n    #Adding begiining and end token\n    input_ids_tgt = input_ids_tgt[:-1] \n    \n    input_mask_src = [1] * len(input_ids_src)\n\n\n    input_mask_tgt = [1] * len(input_ids_tgt)\n    \n    \n    \n    #print(len(input_ids_tgt))\n    #print(len(input_mask_tgt))\n    #print(len(labels_tgt))\n    #print(len(segment_ids_tgt))\n    \n    while len(input_ids_src) < max_seq_length_src:\n        input_ids_src.append(0)\n        input_mask_src.append(0)\n        segment_ids_src.append(0)\n\n    while len(input_ids_tgt) < max_seq_length_tgt:\n        input_ids_tgt.append(0)\n        input_mask_tgt.append(0)\n        segment_ids_tgt.append(0)\n        labels_tgt.append(0)\n\n    feature = InputFeatures( src_input_ids=input_ids_src,src_input_mask=input_mask_src,src_segment_ids=segment_ids_src,\n        tgt_input_ids=input_ids_tgt,tgt_input_mask=input_mask_tgt,tgt_labels=labels_tgt)\n\n    \n    return feature\n\n\ndef file_based_input_fn_builder(input_file, max_seq_length_src,max_seq_length_tgt, is_training,\n                                drop_remainder, is_distributed=False):\n    \"\"\"Creates an `input_fn` closure to be passed to TPUEstimator.\"\"\"\n\n    name_to_features = {\n        \"src_input_ids\": tf.FixedLenFeature([max_seq_length_src], tf.int64),\n        \"src_input_mask\": tf.FixedLenFeature([max_seq_length_src], tf.int64),\n        \"src_segment_ids\": tf.FixedLenFeature([max_seq_length_src], tf.int64),\n        \"tgt_input_ids\": tf.FixedLenFeature([max_seq_length_tgt], tf.int64),\n        \"tgt_input_mask\": tf.FixedLenFeature([max_seq_length_tgt], tf.int64),\n        \"tgt_labels\" : tf.FixedLenFeature([max_seq_length_tgt], tf.int64),\n        \n        \n    }\n\n    def _decode_record(record, name_to_features):\n        \"\"\"Decodes a record to a TensorFlow example.\"\"\"\n        example = tf.parse_single_example(record, name_to_features)\n        print(example)\n        print(example.keys())\n\n        # tf.Example only supports tf.int64, but the TPU only supports tf.int32.\n        # So cast all int64 to int32.\n        for name in list(example.keys()):\n            t = example[name]\n            if t.dtype == tf.int64:\n                t = tf.to_int32(t)\n            example[name] = t\n\n        return example\n\n    def input_fn(params):\n        \"\"\"The actual input function.\"\"\"\n        batch_size = params[\"batch_size\"]\n\n        # For training, we want a lot of parallel reading and shuffling.\n        # For eval, we want no shuffling and parallel reading doesn't matter.\n        d = tf.data.TFRecordDataset(input_file)\n        if is_training:\n\n            if is_distributed:\n                import horovod.tensorflow as hvd\n                tf.logging.info('distributed mode is enabled.'\n                                'size:{} rank:{}'.format(hvd.size(), hvd.rank()))\n                # https://github.com/uber/horovod/issues/223\n                d = d.shard(hvd.size(), hvd.rank())\n\n                d = d.repeat()\n                d = d.shuffle(buffer_size=100)\n                d = d.apply(\n                    tf.contrib.data.map_and_batch(\n                        lambda record: _decode_record(record, name_to_features),\n                        batch_size=batch_size//hvd.size(),\n                        drop_remainder=drop_remainder))\n            else:\n                tf.logging.info('distributed mode is not enabled.')\n                d = d.repeat()\n                d = d.shuffle(buffer_size=100)\n                d = d.apply(\n                    tf.contrib.data.map_and_batch(\n                        lambda record: _decode_record(record, name_to_features),\n                        batch_size=batch_size,\n                        drop_remainder=drop_remainder))\n\n        else:\n            d = d.apply(\n                tf.contrib.data.map_and_batch(\n                    lambda record: _decode_record(record, name_to_features),\n                    batch_size=batch_size,\n                    drop_remainder=drop_remainder))\n\n        return d\n    return input_fn\n  \n  \ndef get_dataset(processor,\n                tokenizer,\n                data_dir,\n                max_seq_length_src,\n                max_seq_length_tgt,\n                batch_size,\n                mode,\n                output_dir,\n                is_distributed=False):\n    \"\"\"\n    Args:\n        processor: Data Preprocessor, must have get_lables,\n            get_train/dev/test/examples methods defined.\n        tokenizer: The Sentence Tokenizer. Generally should be\n            SentencePiece Model.\n        data_dir: The input data directory.\n        max_seq_length: Max sequence length.\n        batch_size: mini-batch size.\n        model: `train`, `eval` or `test`.\n        output_dir: The directory to save the TFRecords in.\n    \"\"\"\n    #label_list = processor.get_labels()\n    if mode == 'train':\n        train_examples = processor.get_train_examples(data_dir)\n        train_file = os.path.join(output_dir, \"train.tf_record\")\n        \n        file_based_convert_examples_to_features(\n            train_examples, max_seq_length_src,max_seq_length_tgt,\n            tokenizer, train_file)\n        dataset = file_based_input_fn_builder(\n            input_file=train_file,\n            max_seq_length_src=max_seq_length_src,\n            max_seq_length_tgt =max_seq_length_tgt,\n            is_training=True,\n            drop_remainder=True,\n            is_distributed=is_distributed)({'batch_size': batch_size})\n    elif mode == 'eval':\n        eval_examples = processor.get_dev_examples(data_dir)\n        eval_file = os.path.join(output_dir, \"eval.tf_record\")\n        \n        file_based_convert_examples_to_features(\n            eval_examples, max_seq_length_src,max_seq_length_tgt,\n            tokenizer, eval_file)\n        dataset = file_based_input_fn_builder(\n            input_file=eval_file,\n            max_seq_length_src=max_seq_length_src,\n            max_seq_length_tgt =max_seq_length_tgt,\n            is_training=False,\n            drop_remainder=True,\n            is_distributed=is_distributed)({'batch_size': batch_size})\n    elif mode == 'test':\n      \n        test_examples = processor.get_test_examples(data_dir)\n        test_file = os.path.join(output_dir, \"predict.tf_record\")\n        \n        \n        file_based_convert_examples_to_features(\n            test_examples, max_seq_length_src,max_seq_length_tgt,\n            tokenizer, test_file)\n        dataset = file_based_input_fn_builder(\n            input_file=test_file,\n            max_seq_length_src=max_seq_length_src,\n            max_seq_length_tgt =max_seq_length_tgt,\n            is_training=False,\n            drop_remainder=True,\n            is_distributed=is_distributed)({'batch_size': batch_size})\n    return dataset\n\n\nif __name__==\"__main__\":\n    tokenizer = tokenization.FullTokenizer(\n        vocab_file=os.path.join(bert_pretrain_dir, 'vocab.txt'),\n        do_lower_case=True)\n\n    vocab_size = len(tokenizer.vocab)\n\n    processor = CNNDailymail()\n    train_dataset = get_dataset(processor,tokenizer,data_dir,max_seq_length_src,max_seq_length_tgt,batch_size,'train',data_dir)\n    eval_dataset = get_dataset(processor,tokenizer,data_dir,max_seq_length_src,max_seq_length_tgt,eval_batch_size,'eval',data_dir)\n    "
  },
  {
    "path": "texar_repo/.gitignore",
    "content": "# Created by https://www.gitignore.io/api/python\n\n### Python ###\n# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nenv/\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\n*.egg-info/\n.installed.cfg\n*.egg\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*,cover\n.hypothesis/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\n\n# PyBuilder\ntarget/\n\n# Jupyter Notebook\n.ipynb_checkpoints\n\n# pyenv\n.python-version\n\n# celery beat schedule file\ncelerybeat-schedule\n\n# SageMath parsed files\n*.sage.py\n\n# dotenv\n.env\n\n# virtualenv\n.venv\nvenv/\nENV/\n\n# Spyder project settings\n.spyderproject\n.spyproject\n\n# Rope project settings\n.ropeproject\n\n# mkdocs documentation\n/site\n\n\n### Linux ###\n*~\n\n# temporary files which can be created if a process still has a handle open of a deleted file\n.fuse_hidden*\n\n# KDE directory preferences\n.directory\n\n# Linux trash folder which might appear on any partition or disk\n.Trash-*\n\n# .nfs files are created when an open file is removed but is still being accessed\n.nfs*\n\n\n### macOS ###\n*.DS_Store\n.AppleDouble\n.LSOverride\n\n# Icon must end with two \\r\nIcon\n\n# Thumbnails\n._*\n\n# Files that might appear in the root of a volume\n.DocumentRevisions-V100\n.fseventsd\n.Spotlight-V100\n.TemporaryItems\n.Trashes\n.VolumeIcon.icns\n.com.apple.timemachine.donotpresent\n\n# Directories potentially created on remote AFP share\n.AppleDB\n.AppleDesktop\nNetwork Trash Folder\nTemporary Items\n.apdisk\n\n\n### Vim ###\n# swap\n[._]*.s[a-v][a-z]\n[._]*.sw[a-p]\n[._]s[a-v][a-z]\n[._]sw[a-p]\n# session\nSession.vim\n# temporary\n.netrwhist\n# auto-generated tag files\ntags\n\n\n### Emacs ###\n# -*- mode: gitignore; -*-\n\\#*\\#\n/.emacs.desktop\n/.emacs.desktop.lock\n*.elc\nauto-save-list\ntramp\n.\\#*\n\n# Org-mode\n.org-id-locations\n*_archive\n\n# flymake-mode\n*_flymake.*\n\n# eshell files\n/eshell/history\n/eshell/lastdir\n\n# elpa packages\n/elpa/\n\n# reftex files\n*.rel\n\n# AUCTeX auto folder\n/auto/\n\n# cask packages\n.cask/\n\n# Flycheck\nflycheck_*.el\n\n# server auth directory\n/server/\n\n# projectiles files\n.projectile\n\n# directory configuration\n.dir-locals.el\n\n# Editors\n.idea\n.vscode\n\ndocs/_build\n\n\n### IntelliJ ###\n*.iml\n\n### pytest ###\n/.pytest_cache/\n\n### Project ###\n/data/\ncheckpoints/\n/language_models/\n/examples/language_model_ptb/simple-examples/\nsimple-examples.tgz\n/examples/hierarchical_dialog/data/\n/examples/sequence_tagging/data/\n/examples/sequence_tagging/tmp/\n/examples/sentence_classifier/data/\n/examples/seq2seq_attn/data/\n/examples/seq2seq_attn/data.zip\n/examples/seq2seq_attn/iwslt14.zip\n/examples/seq2seq_attn/toy_copy.zip\n/examples/seq2seq_rl/data/\n/examples/seq2seq_rl/data.zip\n/examples/seq2seq_rl/iwslt14.zip\n/examples/seq2seq_rl/toy_copy.zip\n/examples/seq2seq_configs/data/\n/examples/seq2seq_configs/data.zip\n/examples/seq2seq_config/iwslt14.zip\n/examples/seq2seq_config/toy_copy.zip\n/examples/seq2seq_exposure_bias/data/\n/examples/text_style_transfer/checkpoints/\n/examples/text_style_transfer/samples/\n/examples/text_style_transfer/data/\n/examples/text_style_transfer/yelp.zip\n/examples/vae_text/simple-examples/\n/examples/vae_text/data/\n/examples/transformer/data/\n/examples/transformer/temp/\n/examples/transformer/outputs/\n/examples/bert/data/\n!/examples/bert/data/download_glue_data.py\n!/examples/bert/data/README.md\n/examples/bert/bert_pretrained_models/\n!/examples/bert/bert_pretrained_models/download_model.sh\n/examples/bert/output\n"
  },
  {
    "path": "texar_repo/.pylintrc",
    "content": "[MASTER]\n\n# Specify a configuration file.\n#rcfile=\n\n# Python code to execute, usually for sys.path manipulation such as\n# pygtk.require().\n#init-hook=\n\n# Add files or directories to the blacklist. They should be base names, not\n# paths.\nignore=CVS\n\n# Add files or directories matching the regex patterns to the blacklist. The\n# regex matches against base names, not paths.\nignore-patterns=\n\n# Pickle collected data for later comparisons.\npersistent=yes\n\n# List of plugins (as comma separated values of python modules names) to load,\n# usually to register additional checkers.\nload-plugins=\n\n# Use multiple processes to speed up Pylint.\njobs=1\n\n# Allow loading of arbitrary C extensions. Extensions are imported into the\n# active Python interpreter and may run arbitrary code.\nunsafe-load-any-extension=no\n\n# A comma-separated list of package or module names from where C extensions may\n# be loaded. Extensions are loading into the active Python interpreter and may\n# run arbitrary code\nextension-pkg-whitelist=\n\n# Allow optimization of some AST trees. This will activate a peephole AST\n# optimizer, which will apply various small optimizations. For instance, it can\n# be used to obtain the result of joining multiple strings with the addition\n# operator. Joining a lot of strings can lead to a maximum recursion error in\n# Pylint and this flag can prevent that. It has one side effect, the resulting\n# AST will be different than the one from reality. This option is deprecated\n# and it will be removed in Pylint 2.0.\noptimize-ast=no\n\n\n[MESSAGES CONTROL]\n\n# Only show warnings with the listed confidence levels. Leave empty to show\n# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED\nconfidence=\n\n# Enable the message, report, category or checker with the given id(s). You can\n# either give multiple identifier separated by comma (,) or put this option\n# multiple time (only on the command line, not in the configuration file where\n# it should appear only once). See also the \"--disable\" option for examples.\n#enable=\n\n# Disable the message, report, category or checker with the given id(s). You\n# can either give multiple identifiers separated by comma (,) or put this\n# option multiple times (only on the command line, not in the configuration\n# file where it should appear only once).You can also use \"--disable=all\" to\n# disable everything first and then reenable specific checks. For example, if\n# you want to run only the similarities checker, you can use \"--disable=all\n# --enable=similarities\". If you want to run only the classes checker, but have\n# no Warning level messages displayed, use\"--disable=all --enable=classes\n# --disable=W\"\ndisable=print-statement,parameter-unpacking,unpacking-in-except,old-raise-syntax,backtick,import-star-module-level,apply-builtin,basestring-builtin,buffer-builtin,cmp-builtin,coerce-builtin,execfile-builtin,file-builtin,long-builtin,raw_input-builtin,reduce-builtin,standarderror-builtin,unicode-builtin,xrange-builtin,coerce-method,delslice-method,getslice-method,setslice-method,no-absolute-import,old-division,dict-iter-method,dict-view-method,next-method-called,metaclass-assignment,indexing-exception,raising-string,reload-builtin,oct-method,hex-method,nonzero-method,cmp-method,input-builtin,round-builtin,intern-builtin,unichr-builtin,map-builtin-not-iterating,zip-builtin-not-iterating,range-builtin-not-iterating,filter-builtin-not-iterating,using-cmp-argument,long-suffix,old-ne-operator,old-octal-literal,suppressed-message,useless-suppression\n\n\n[REPORTS]\n\n# Set the output format. Available formats are text, parseable, colorized, msvs\n# (visual studio) and html. You can also give a reporter class, eg\n# mypackage.mymodule.MyReporterClass.\noutput-format=text\n\n# Put messages in a separate file for each module / package specified on the\n# command line instead of printing them on stdout. Reports (if any) will be\n# written in a file name \"pylint_global.[txt|html]\". This option is deprecated\n# and it will be removed in Pylint 2.0.\nfiles-output=no\n\n# Tells whether to display a full report or only the messages\nreports=yes\n\n# Python expression which should return a note less than 10 (10 is the highest\n# note). You have access to the variables errors warning, statement which\n# respectively contain the number of errors / warnings messages and the total\n# number of statements analyzed. This is used by the global evaluation report\n# (RP0004).\nevaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)\n\n# Template used to display messages. This is a python new-style format string\n# used to format the message information. See doc for all details\n#msg-template=\n\n\n[BASIC]\n\n# Good variable names which should always be accepted, separated by a comma\ngood-names=i,j,k,ex,Run,_\n\n# Bad variable names which should always be refused, separated by a comma\nbad-names=foo,bar,baz,toto,tutu,tata\n\n# Colon-delimited sets of names that determine each other's naming style when\n# the name regexes allow several styles.\nname-group=\n\n# Include a hint for the correct naming format with invalid-name\ninclude-naming-hint=no\n\n# List of decorators that produce properties, such as abc.abstractproperty. Add\n# to this list to register other decorators that produce valid properties.\nproperty-classes=abc.abstractproperty\n\n# Regular expression matching correct module names\nmodule-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$\n\n# Naming hint for module names\nmodule-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$\n\n# Regular expression matching correct constant names\nconst-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$\n\n# Naming hint for constant names\nconst-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$\n\n# Regular expression matching correct class names\nclass-rgx=[A-Z_][a-zA-Z0-9]+$\n\n# Naming hint for class names\nclass-name-hint=[A-Z_][a-zA-Z0-9]+$\n\n# Regular expression matching correct function names\nfunction-rgx=[a-z_][a-z0-9_]{2,30}$\n\n# Naming hint for function names\nfunction-name-hint=[a-z_][a-z0-9_]{2,30}$\n\n# Regular expression matching correct method names\nmethod-rgx=[a-z_][a-z0-9_]{2,30}$\n\n# Naming hint for method names\nmethod-name-hint=[a-z_][a-z0-9_]{2,30}$\n\n# Regular expression matching correct attribute names\nattr-rgx=[a-z_][a-z0-9_]{2,30}$\n\n# Naming hint for attribute names\nattr-name-hint=[a-z_][a-z0-9_]{2,30}$\n\n# Regular expression matching correct argument names\nargument-rgx=[a-z_][a-z0-9_]{2,30}$\n\n# Naming hint for argument names\nargument-name-hint=[a-z_][a-z0-9_]{2,30}$\n\n# Regular expression matching correct variable names\nvariable-rgx=[a-z_][a-z0-9_]{2,30}$\n\n# Naming hint for variable names\nvariable-name-hint=[a-z_][a-z0-9_]{2,30}$\n\n# Regular expression matching correct class attribute names\nclass-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$\n\n# Naming hint for class attribute names\nclass-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$\n\n# Regular expression matching correct inline iteration names\ninlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$\n\n# Naming hint for inline iteration names\ninlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$\n\n# Regular expression which should only match function or class names that do\n# not require a docstring.\nno-docstring-rgx=^_\n\n# Minimum line length for functions/classes that require docstrings, shorter\n# ones are exempt.\ndocstring-min-length=-1\n\n\n[ELIF]\n\n# Maximum number of nested blocks for function / method body\nmax-nested-blocks=5\n\n\n[TYPECHECK]\n\n# Tells whether missing members accessed in mixin class should be ignored. A\n# mixin class is detected if its name ends with \"mixin\" (case insensitive).\nignore-mixin-members=yes\n\n# List of module names for which member attributes should not be checked\n# (useful for modules/projects where namespaces are manipulated during runtime\n# and thus existing member attributes cannot be deduced by static analysis. It\n# supports qualified module names, as well as Unix pattern matching.\nignored-modules=\n\n# List of class names for which member attributes should not be checked (useful\n# for classes with dynamically set attributes). This supports the use of\n# qualified names.\nignored-classes=optparse.Values,thread._local,_thread._local\n\n# List of members which are set dynamically and missed by pylint inference\n# system, and so shouldn't trigger E1101 when accessed. Python regular\n# expressions are accepted.\ngenerated-members=\n\n# List of decorators that produce context managers, such as\n# contextlib.contextmanager. Add to this list to register other decorators that\n# produce valid context managers.\ncontextmanager-decorators=contextlib.contextmanager\n\n\n[SPELLING]\n\n# Spelling dictionary name. Available dictionaries: none. To make it working\n# install python-enchant package.\nspelling-dict=\n\n# List of comma separated words that should not be checked.\nspelling-ignore-words=\n\n# A path to a file that contains private dictionary; one word per line.\nspelling-private-dict-file=\n\n# Tells whether to store unknown words to indicated private dictionary in\n# --spelling-private-dict-file option instead of raising a message.\nspelling-store-unknown-words=no\n\n\n[MISCELLANEOUS]\n\n# List of note tags to take in consideration, separated by a comma.\nnotes=FIXME,XXX,TODO\n\n\n[SIMILARITIES]\n\n# Minimum lines number of a similarity.\nmin-similarity-lines=4\n\n# Ignore comments when computing similarities.\nignore-comments=yes\n\n# Ignore docstrings when computing similarities.\nignore-docstrings=yes\n\n# Ignore imports when computing similarities.\nignore-imports=no\n\n\n[VARIABLES]\n\n# Tells whether we should check for unused import in __init__ files.\ninit-import=no\n\n# A regular expression matching the name of dummy variables (i.e. expectedly\n# not used).\ndummy-variables-rgx=(_+[a-zA-Z0-9]*?$)|dummy\n\n# List of additional names supposed to be defined in builtins. Remember that\n# you should avoid to define new builtins when possible.\nadditional-builtins=\n\n# List of strings which can identify a callback function by name. A callback\n# name must start or end with one of those strings.\ncallbacks=cb_,_cb\n\n# List of qualified module names which can have objects that can redefine\n# builtins.\nredefining-builtins-modules=six.moves,future.builtins\n\n\n[LOGGING]\n\n# Logging modules to check that the string format arguments are in logging\n# function parameter format\nlogging-modules=logging\n\n\n[FORMAT]\n\n# Maximum number of characters on a single line.\nmax-line-length=80\n\n# Regexp for a line that is allowed to be longer than the limit.\nignore-long-lines=^\\s*(# )?<?https?://\\S+>?$\n\n# Allow the body of an if to be on the same line as the test if there is no\n# else.\nsingle-line-if-stmt=no\n\n# List of optional constructs for which whitespace checking is disabled. `dict-\n# separator` is used to allow tabulation in dicts, etc.: {1  : 1,\\n222: 2}.\n# `trailing-comma` allows a space between comma and closing bracket: (a, ).\n# `empty-line` allows space-only lines.\nno-space-check=trailing-comma,dict-separator\n\n# Maximum number of lines in a module\nmax-module-lines=1000\n\n# String used as indentation unit. This is usually \"    \" (4 spaces) or \"\\t\" (1\n# tab).\nindent-string='    '\n\n# Number of spaces of indent required inside a hanging  or continued line.\nindent-after-paren=4\n\n# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.\nexpected-line-ending-format=\n\n\n[DESIGN]\n\n# Maximum number of arguments for function / method\nmax-args=5\n\n# Argument names that match this expression will be ignored. Default to name\n# with leading underscore\nignored-argument-names=_.*\n\n# Maximum number of locals for function / method body\nmax-locals=15\n\n# Maximum number of return / yield for function / method body\nmax-returns=6\n\n# Maximum number of branch for function / method body\nmax-branches=12\n\n# Maximum number of statements in function / method body\nmax-statements=50\n\n# Maximum number of parents for a class (see R0901).\nmax-parents=7\n\n# Maximum number of attributes for a class (see R0902).\nmax-attributes=7\n\n# Minimum number of public methods for a class (see R0903).\nmin-public-methods=2\n\n# Maximum number of public methods for a class (see R0904).\nmax-public-methods=20\n\n# Maximum number of boolean expressions in a if statement\nmax-bool-expr=5\n\n\n[CLASSES]\n\n# List of method names used to declare (i.e. assign) instance attributes.\ndefining-attr-methods=__init__,__new__,setUp\n\n# List of valid names for the first argument in a class method.\nvalid-classmethod-first-arg=cls\n\n# List of valid names for the first argument in a metaclass class method.\nvalid-metaclass-classmethod-first-arg=mcs\n\n# List of member names, which should be excluded from the protected access\n# warning.\nexclude-protected=_asdict,_fields,_replace,_source,_make\n\n\n[IMPORTS]\n\n# Deprecated modules which should not be used, separated by a comma\ndeprecated-modules=optparse\n\n# Create a graph of every (i.e. internal and external) dependencies in the\n# given file (report RP0402 must not be disabled)\nimport-graph=\n\n# Create a graph of external dependencies in the given file (report RP0402 must\n# not be disabled)\next-import-graph=\n\n# Create a graph of internal dependencies in the given file (report RP0402 must\n# not be disabled)\nint-import-graph=\n\n# Force import order to recognize a module as part of the standard\n# compatibility libraries.\nknown-standard-library=\n\n# Force import order to recognize a module as part of a third party library.\nknown-third-party=enchant\n\n# Analyse import fallback blocks. This can be used to support both Python 2 and\n# 3 compatible code, which means that the block might have code that exists\n# only in one or another interpreter, leading to false positives when analysed.\nanalyse-fallback-blocks=no\n\n\n[EXCEPTIONS]\n\n# Exceptions that will emit a warning when being caught. Defaults to\n# \"Exception\"\novergeneral-exceptions=Exception\n"
  },
  {
    "path": "texar_repo/.travis.yml",
    "content": "sudo: required\nlanguage: python\npython:\n  - \"2.7\"\n  - \"3.5\"\n  - \"3.6\"\n\ninstall:\n  - pip install -e .[tensorflow-cpu]\n  - pip install flake8\n\nbefore_script:\n  # stop the build if there are Python syntax errors or undefined names\n  - flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics\n  # exit-zero treats all errors as warnings. Texar limits lines to a maximum of 80 chars. \n  - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=80 --statistics\n\nscript:\n  # units test\n  - pytest\n\nnotifications:\n  email: false\n"
  },
  {
    "path": "texar_repo/CHANGELOG.md",
    "content": "\n## [Unreleased]\n\n### New features\n\n* [2019-01-02] Support distributed-GPU training. See the [example](https://github.com/asyml/texar/tree/master/examples/distributed_gpu) \n* [2018-11-29] Support pre-trained BERT model. See the [example](https://github.com/asyml/texar/tree/master/examples/bert) \n"
  },
  {
    "path": "texar_repo/LICENSE",
    "content": "                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"{}\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright {yyyy} {name of copyright owner}\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "texar_repo/README.md",
    "content": "<div align=\"center\">\n   <img src=\"https://zhitinghu.github.io/texar_web/images/logo_h_035.png\"><br><br>\n</div>\n \n-----------------\n\n[![Build Status](https://travis-ci.org/asyml/texar.svg?branch=master)](https://travis-ci.org/asyml/texar)\n[![Documentation Status](https://readthedocs.org/projects/texar/badge/?version=latest)](https://texar.readthedocs.io/en/latest/?badge=latest)\n[![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/asyml/texar/blob/master/LICENSE)\n \n**Texar** is an open-source toolkit based on TensorFlow, aiming to support a broad set of machine learning especially **text generation tasks**, such as machine translation, dialog, summarization, content manipulation, language modeling, and so on. Texar is designed for both researchers and practitioners for fast prototyping and experimentation.\n \nWith the design goals of **modularity, versatility, and extensibility** in mind, Texar extracts the common patterns underlying the diverse tasks and methodologies, creates a library of highly reusable modules and functionalities, and facilitates **arbitrary model architectures and algorithmic paradigms**, e.g., \n   * encoder(s) to decoder(s), sequential- and self-attentions, memory, hierarchical models, classifiers... \n   * maximum likelihood learning, reinforcement learning, adversarial learning, probabilistic modeling, ... \n\nWith Texar, cutting-edge complex models can be easily constructed, freely enriched with best modeling/training practices, readily fitted into standard training/evaluation pipelines, and fastly experimented and evolved by, e.g., plugging-in and swapping-out different modules.\n\n<div align=\"center\">\n   <img src=\"https://zhitinghu.github.io/texar_web/images/texar_stack.png\"><br><br>\n</div> \n\n### Key Features\n* **Versatility**. Texar contains a wide range of modules and functionalities for composing arbitrary model architectures and implementing various learning algorithms, as well as for data processing, evaluation, prediction, etc.\n* **Modularity**. Texar decomposes diverse complex machine learning models/algorithms into a set of highly-reusable modules. In particular, model **architecture, losses, and learning processes** are fully decomposed.  \nUsers can construct their own models at a high conceptual level just like assembling building blocks. It is convenient to plug-ins or swap-out modules, and configure rich options of each module. For example, switching between maximum likelihood learning and reinforcement learning involves only changing several lines of code.\n* **Extensibility**. It is straightforward to integrate any user-customized, external modules. Also, Texar is fully compatible with the native TensorFlow interfaces and can take advantage of the rich TensorFlow features, and resources from the vibrant open-source community.\n* Interfaces with different functionality levels. Users can customize a model through 1) simple **Python/YAML configuration files** of provided model templates/examples; 2) programming with **Python Library APIs** for maximal customizability.\n* Easy-to-use APIs: 1) Convenient automatic variable re-use---no worry about the complicated TF variable scopes; 2) PyTorch-like callable modules; 3) Rich configuration options for each module, all with default values; ...\n* Well-structured high-quality code of uniform design patterns and consistent styles. \n* Clean, detailed [documentation](https://texar.readthedocs.io) and rich [examples](./examples).\n* **Distributed model training** with multiple GPUs.\n\n### Library API Example\nBuilds a (self-)attentional sequence encoder-decoder model, with different learning algorithms:\n```python\nimport texar as tx\n\n# Data \ndata = tx.data.PairedTextData(hparams=hparams_data) # Hyperparameter configs in `hparams` \niterator = tx.data.DataIterator(data)\nbatch = iterator.get_next() # A data mini-batch\n\n# Model architecture\nembedder = tx.modules.WordEmbedder(data.target_vocab.size, hparams=hparams_emb)\nencoder = tx.modules.TransformerEncoder(hparams=hparams_encoder)\noutputs_enc = encoder(inputs=embedder(batch['source_text_ids']),\n                      sequence_length=batch['source_length'])\n                      \ndecoder = tx.modules.AttentionRNNDecoder(memory=output_enc, \n                                         memory_sequence_length=batch['source_length'],\n                                         hparams=hparams_decoder)\noutputs, _, _ = decoder(inputs=embedder(batch['target_text_ids']),\n                        sequence_length=batch['target_length']-1)\n                        \n# Loss for maximum likelihood learning\nloss = tx.losses.sequence_sparse_softmax_cross_entropy(\n    labels=batch['target_text_ids'][:, 1:],\n    logits=outputs.logits,\n    sequence_length=batch['target_length']-1) # Automatic masks\n\n# Beam search decoding\noutputs_bs, _, _ = tx.modules.beam_search_decode(\n    decoder,\n    embedding=embedder,\n    start_tokens=[data.target_vocab.bos_token_id]*num_samples,\n    end_token=data.target_vocab.eos_token_id)\n```\n```python\n# Policy gradient agent for RL learning\nagent = tx.agents.SeqPGAgent(samples=outputs.sample_id,\n                             logits=outputs.logits,\n                             sequence_length=batch['target_length']-1,\n                             hparams=config_model.agent)\n```\nMany more examples are available [here](./examples)\n  \n### Installation\n```\ngit clone https://github.com/asyml/texar.git\ncd texar\npip install -e .\n```\n\n### Getting Started\n* [Examples](./examples)\n* [Documentation](https://texar.readthedocs.io)\n\n### Reference\nIf you use Texar, please cite the [report](https://arxiv.org/abs/1809.00794) with the following BibTex entry:\n```\nTexar: A Modularized, Versatile, and Extensible Toolkit for Text Generation\nZhiting Hu, Haoran Shi, Zichao Yang, Bowen Tan, Tiancheng Zhao, Junxian He, Wentao Wang, Lianhui Qin, Di Wang, Xuezhe Ma, Hector Liu, Xiaodan Liang, Wanrong Zhu, Devendra Singh Sachan, Eric P. Xing\n2018\n\n@article{hu2018texar,\n  title={Texar: A Modularized, Versatile, and Extensible Toolkit for Text Generation},\n  author={Hu, Zhiting and Shi, Haoran and Yang, Zichao and Tan, Bowen and Zhao, Tiancheng and He, Junxian and Wang, Wentao and Qin, Lianhui and Wang, Di and others},\n  journal={arXiv preprint arXiv:1809.00794},\n  year={2018}\n}\n```\n\n### License\n[Apache License 2.0](./LICENSE)\n"
  },
  {
    "path": "texar_repo/bin/average_checkpoints.py",
    "content": "\"\"\"Checkpoint averaging script.\"\"\"\n\n# This script is modified version of\n# https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/bin/t2t_avg_all.py\n# which comes with the following license and copyright notice:\n\n# Copyright 2017 The Tensor2Tensor Authors.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nimport os\nimport argparse\nimport six\n\nimport tensorflow as tf\nimport numpy as np\n\n\ndef main():\n  tf.logging.set_verbosity(tf.logging.INFO)\n\n  parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)\n  parser.add_argument(\"--model_dir\", required=True,\n                      help=\"The model directory containing the checkpoints.\")\n  parser.add_argument(\"--output_dir\", required=True,\n                      help=\"The output directory where the averaged checkpoint will be saved.\")\n  parser.add_argument(\"--max_count\", type=int, default=8,\n                      help=\"The maximal number of checkpoints to average.\")\n  args = parser.parse_args()\n\n  if args.model_dir == args.output_dir:\n    raise ValueError(\"Model and output directory must be different\")\n\n  checkpoints_path = tf.train.get_checkpoint_state(args.model_dir).all_model_checkpoint_paths\n  if len(checkpoints_path) > args.max_count:\n    checkpoints_path = checkpoints_path[-args.max_count:]\n  num_checkpoints = len(checkpoints_path)\n\n  tf.logging.info(\"Averaging %d checkpoints...\" % num_checkpoints)\n  tf.logging.info(\"Listing variables...\")\n\n  var_list = tf.train.list_variables(checkpoints_path[0])\n  avg_values = {}\n  for name, shape in var_list:\n    if not name.startswith(\"global_step\"):\n      avg_values[name] = np.zeros(shape)\n\n  for checkpoint_path in checkpoints_path:\n    tf.logging.info(\"Loading checkpoint %s\" % checkpoint_path)\n    reader = tf.train.load_checkpoint(checkpoint_path)\n    for name in avg_values:\n      avg_values[name] += reader.get_tensor(name) / num_checkpoints\n\n  tf_vars = []\n  for name, value in six.iteritems(avg_values):\n    tf_vars.append(tf.get_variable(name, shape=value.shape))\n  placeholders = [tf.placeholder(v.dtype, shape=v.shape) for v in tf_vars]\n  assign_ops = [tf.assign(v, p) for (v, p) in zip(tf_vars, placeholders)]\n\n  latest_step = int(checkpoints_path[-1].split(\"-\")[-1])\n  out_base_file = os.path.join(args.output_dir, \"model.ckpt\")\n  global_step = tf.get_variable(\n      \"global_step\",\n      initializer=tf.constant(latest_step, dtype=tf.int64),\n      trainable=False)\n  saver = tf.train.Saver(tf.global_variables())\n\n  with tf.Session() as sess:\n    sess.run(tf.global_variables_initializer())\n    for p, assign_op, (name, value) in zip(placeholders, assign_ops, six.iteritems(avg_values)):\n      sess.run(assign_op, {p: value})\n    tf.logging.info(\"Saving averaged checkpoint to %s-%d\" % (out_base_file, latest_step))\n    saver.save(sess, out_base_file, global_step=global_step)\n\n\nif __name__ == \"__main__\":\n  main()\n"
  },
  {
    "path": "texar_repo/bin/train.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Main script for model training.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport tempfile\nimport yaml\n\nimport tensorflow as tf\n\nfrom texar import utils\nfrom texar.run import Executor\n\n\ntf.flags.DEFINE_string(\"config_paths\", \"\",\n                       \"Paths to configuration files. This can be a path to a \"\n                       \"directory in which all files are loaded, or paths to \"\n                       \"multiple files separated by commas. Setting a key in \"\n                       \"these files is equivalent to setting the FLAG value \"\n                       \"with the same name. If a key is set in both config \"\n                       \"files and FLAG, the value in config files is used.\")\n\ntf.flags.DEFINE_string(\"model\", \"\",\n                       \"Name of the model class.\")\ntf.flags.DEFINE_string(\"model_hparams\", \"{}\",\n                       \"YAML configuration string for the model \"\n                       \"hyper-parameters.\")\n\ntf.flags.DEFINE_string(\"data_hparams_train\", \"{}\",\n                       \"YAML configuration string for the training data \"\n                       \"hyper-parameters.\")\ntf.flags.DEFINE_string(\"data_hparams_eval\", \"{}\",\n                       \"YAML configuration string for the evaluation data \"\n                       \"hyper-parameters.\")\n\ntf.flags.DEFINE_integer(\"max_train_steps\", None,\n                        \"Maximum number of training steps to run. \"\n                        \"If None, train forever or until the train data \"\n                        \"generates the OutOfRange exception. If OutOfRange \"\n\t\t\t\"occurs in the middle, training stops before \"\n\t\t\t\"max_train_steps steps.\")\ntf.flags.DEFINE_integer(\"eval_steps\", None,\n                        \"Maximum number of evaluation steps to run. \"\n                        \"If None, evaluate until the eval data raises an \"\n                        \"OutOfRange exception.\")\n\n# RunConfig\ntf.flags.DEFINE_string(\"model_dir\", None,\n                       \"The directory where model parameters, graph, \"\n                       \"summeries, etc are saved. If None, a local temporary \"\n                       \"directory is created.\")\ntf.flags.DEFINE_integer(\"tf_random_seed\", None,\n                        \"Random seed for TensorFlow initializers. Setting \"\n                        \"this value allows consistency between reruns.\")\ntf.flags.DEFINE_integer(\"save_summary_steps\", 100,\n                        \"Save summaries every this many steps.\")\ntf.flags.DEFINE_integer(\"save_checkpoints_steps\", None,\n                        \"Save checkpoints every this many steps. \"\n                        \"Can not be specified with save_checkpoints_secs.\")\ntf.flags.DEFINE_integer(\"save_checkpoints_secs\", None,\n                        \"Save checkpoints every this many seconds. \"\n                        \"Can not be specified with save_checkpoints_steps. \"\n                        \"Defaults to 600 seconds if both \"\n                        \"save_checkpoints_steps and save_checkpoints_secs \"\n                        \"are not set. If both are set to -1, then \"\n                        \"checkpoints are disabled.\")\ntf.flags.DEFINE_integer(\"keep_checkpoint_max\", 5,\n                        \"Maximum number of recent checkpoint files to keep. \"\n                        \"As new files are created, older files are deleted. \"\n                        \"If None or 0, all checkpoint files are kept.\")\ntf.flags.DEFINE_integer(\"keep_checkpoint_every_n_hours\", 10000,\n                        \"Number of hours between each checkpoint to be saved. \"\n                        \"The default value of 10,000 hours effectively \"\n                        \"disables the feature.\")\ntf.flags.DEFINE_integer(\"log_step_count_steps\", 100,\n                        \"The frequency, in number of global steps, that the \"\n                        \"global step/sec and the loss will be logged during \"\n                        \"training.\")\n# Session config\ntf.flags.DEFINE_float(\"per_process_gpu_memory_fraction\", 1.0,\n                      \"Fraction of the available GPU memory to allocate for \"\n                      \"each process.\")\ntf.flags.DEFINE_boolean(\"gpu_allow_growth\", False,\n                        \"If true, the allocator does not pre-allocate the \"\n                        \"entire specified GPU memory region, instead starting \"\n                        \"small and growing as needed.\")\ntf.flags.DEFINE_boolean(\"log_device_placement\", False,\n                        \"Whether device placements should be logged.\")\n\nFLAGS = tf.flags.FLAGS\n\ndef _process_config():\n    # Loads configs\n    config = utils.load_config(FLAGS.config_paths)\n\n    # Parses YAML FLAGS\n    FLAGS.model_hparams = yaml.load(FLAGS.model_hparams)\n    FLAGS.data_hparams_train = yaml.load(FLAGS.data_hparams_train)\n    FLAGS.data_hparams_eval = yaml.load(FLAGS.data_hparams_eval)\n\n    # Merges\n    final_config = {}\n    for flag_key in dir(FLAGS):\n        if flag_key in {'h', 'help', 'helpshort'}: # Filters out help flags\n            continue\n        flag_value = getattr(FLAGS, flag_key)\n        config_value = config.get(flag_key, None)\n        if isinstance(flag_value, dict) and isinstance(config_value, dict):\n            final_config[flag_key] = utils.dict_patch(config_value, flag_value)\n        elif flag_key in config:\n            final_config[flag_key] = config_value\n        else:\n            final_config[flag_key] = flag_value\n\n    # Processes\n    if final_config['model_dir'] is None:\n        final_config['model_dir'] = tempfile.mkdtemp()\n\n    if final_config['save_checkpoints_steps'] is None \\\n            and final_config['save_checkpoints_secs'] is None:\n        final_config['save_checkpoints_secs'] = 600\n    if final_config['save_checkpoints_steps'] == -1 \\\n            and final_config['save_checkpoints_secs'] == -1:\n        final_config['save_checkpoints_steps'] = None\n        final_config['save_checkpoints_secs'] = None\n\n    tf.logging.info(\"Final Config:\\n%s\", yaml.dump(final_config))\n\n    return final_config\n\ndef _get_run_config(config):\n    gpu_options = tf.GPUOptions(\n        per_process_gpu_memory_fraction=\\\n                config['per_process_gpu_memory_fraction'],\n        allow_growth=config['gpu_allow_growth'])\n    sess_config = tf.ConfigProto(\n        gpu_options=gpu_options,\n        log_device_placement=config['log_device_placement'])\n\n    run_config = tf.estimator.RunConfig(\n        model_dir=config['model_dir'],\n        tf_random_seed=config['tf_random_seed'],\n        save_summary_steps=config['save_summary_steps'],\n        save_checkpoints_steps=config['save_checkpoints_steps'],\n        save_checkpoints_secs=config['save_checkpoints_secs'],\n        keep_checkpoint_max=config['keep_checkpoint_max'],\n        keep_checkpoint_every_n_hours=config['keep_checkpoint_every_n_hours'],\n        log_step_count_steps=config['log_step_count_steps'],\n        session_config=sess_config)\n\n    return run_config\n\ndef main(_):\n    \"\"\"The entrypoint.\"\"\"\n\n    config = _process_config()\n\n    run_config = _get_run_config(config)\n\n    kwargs = {\n        'data_hparams': config['data_hparams_train'],\n        'hparams': config['model_hparams']\n    }\n    model = utils.check_or_get_instance_with_redundant_kwargs(\n        config['model'], kwargs=kwargs,\n        module_paths=['texar.models', 'texar.custom'])\n\n    data_hparams = {\n        'train': config['data_hparams_train'],\n        'eval': config['data_hparams_eval']\n    }\n\n    exor = Executor(\n        model=model,\n        data_hparams=data_hparams,\n        config=run_config)\n\n    exor.train_and_evaluate(\n        max_train_steps=config['max_train_steps'],\n        eval_steps=config['eval_steps'])\n\nif __name__ == \"__main__\":\n    tf.logging.set_verbosity(tf.logging.INFO)\n    tf.app.run(main=main)\n"
  },
  {
    "path": "texar_repo/bin/utils/README.md",
    "content": "\nThis directory contains several utilities for, e.g., data pre-processing. \n\nInstructions of using BPE and WPM encoding are as follows. \nSee [examples/transformer](https://github.com/asyml/texar/tree/master/examples/transformer)\nfor a real example of using these encoding.\n\n### *[Byte Pair Encoding (BPE)](https://arxiv.org/abs/1508.07909)* pipeline\n\n* Add `bin` directory to `PATH` env variable\n```bash\nTEXAR=$(pwd)  \nexport PATH=$PATH:$TEXAR/bin\n```\n\n* Learning BPE vocab on source and target combined\n```bash\ncat train.src train.trg | learn_bpe -s 32000 > bpe-codes.32000\n```\n\n* Applying BPE on source and target files\n```bash\napply_bpe -c bpe-codes.32000 < train.src > train.src.bpe\napply_bpe -c bpe-codes.32000 < train.trg > train.trg.bpe\napply_bpe -c bpe-codes.32000 < dev.src > dev.src.bpe\napply_bpe -c bpe-codes.32000 < dev.trg > dev.trg.bpe\napply_bpe -c bpe-codes.32000 < test.src > test.src.bpe\n```\n\n* BPE decoding target to match with references\n```bash\nmv test.out test.out.bpe\ncat test.out.bpe | sed -E 's/(@@ )|(@@ ?$)//g' > test.out\n```\n\n##### Evaluate Using t2t-bleu\n```bash\nt2t-bleu --translation=test.out --reference=test.tgt\n```\n\n### Word Piece Model (WPM) pipeline\n\n* This requires installation of *[sentencepiece](https://github.com/google/sentencepiece#python-module) library\n```bash\npip install sentencepiece\n```\n* Learning Word Piece on source and target combined\n```bash\nspm_train --input=train.src,train.tgt --vocab_size 32000 --model_prefix=wpm-codes\n```\n\n* Applying Word Piece on source and target\n```bash\nspm_encode --model wpm-codes.model --output_format=id < train.src > train.src.wpm\nspm_encode --model wpm-codes.model --output_format=id < train.tgt > train.tgt.wpm\nspm_encode --model wpm-codes.model --output_format=id < valid.src > valid.src.wpm\nspm_encode --model wpm-codes.model --output_format=id < valid.tgt > valid.tgt.wpm\nspm_encode --model wpm-codes.model --output_format=id < test.src > test.src.wpm\n```\n\n* WPM decoding/detokenising target to match with references\n```bash\nmv test.out test.wpm\nspm_decode --model wpm-codes.model --input_format=id < test.out.wpm > test.out\n```\n"
  },
  {
    "path": "texar_repo/bin/utils/apply_bpe",
    "content": "#!/usr/bin/env python\n# -*- coding: utf-8 -*-\n# Author: Rico Sennrich\n# flake8: noqa\n\n\"\"\"Use operations learned with learn_bpe to encode a new text.\nThe text will not be smaller, but use only a fixed vocabulary, with rare words\nencoded as variable-length sequences of subword units.\n\nReference:\nRico Sennrich, Barry Haddow and Alexandra Birch (2015). Neural Machine Translation of Rare Words with Subword Units.\nProceedings of the 54th Annual Meeting of the Association for Computational Linguistics (ACL 2016). Berlin, Germany.\n\"\"\"\n# This file is retrieved from https://github.com/rsennrich/subword-nmt\n\nfrom __future__ import unicode_literals, division\n\nimport sys\nimport codecs\nimport io\nimport argparse\nimport json\nimport re\nfrom collections import defaultdict\n\n# hack for python2/3 compatibility\nfrom io import open\nargparse.open = open\n\nclass BPE(object):\n\n    def __init__(self, codes, separator='@@', vocab=None, glossaries=None):\n\n        # check version information\n        firstline = codes.readline()\n        if firstline.startswith('#version:'):\n            self.version = tuple([int(x) for x in re.sub(r'(\\.0+)*$','', firstline.split()[-1]).split(\".\")])\n        else:\n            self.version = (0, 1)\n            codes.seek(0)\n\n        self.bpe_codes = [tuple(item.split()) for item in codes]\n\n        # some hacking to deal with duplicates (only consider first instance)\n        self.bpe_codes = dict([(code,i) for (i,code) in reversed(list(enumerate(self.bpe_codes)))])\n\n        self.bpe_codes_reverse = dict([(pair[0] + pair[1], pair) for pair,i in self.bpe_codes.items()])\n\n        self.separator = separator\n\n        self.vocab = vocab\n\n        self.glossaries = glossaries if glossaries else []\n\n        self.cache = {}\n\n    def segment(self, sentence):\n        \"\"\"segment single sentence (whitespace-tokenized string) with BPE encoding\"\"\"\n        output = []\n        for word in sentence.split():\n            new_word = [out for segment in self._isolate_glossaries(word)\n                        for out in encode(segment,\n                                          self.bpe_codes,\n                                          self.bpe_codes_reverse,\n                                          self.vocab,\n                                          self.separator,\n                                          self.version,\n                                          self.cache,\n                                          self.glossaries)]\n\n            for item in new_word[:-1]:\n                output.append(item + self.separator)\n            output.append(new_word[-1])\n\n        return ' '.join(output)\n\n    def _isolate_glossaries(self, word):\n        word_segments = [word]\n        for gloss in self.glossaries:\n            word_segments = [out_segments for segment in word_segments\n                                 for out_segments in isolate_glossary(segment, gloss)]\n        return word_segments\n\ndef create_parser():\n    parser = argparse.ArgumentParser(\n        formatter_class=argparse.RawDescriptionHelpFormatter,\n        description=\"learn BPE-based word segmentation\")\n\n    parser.add_argument(\n        '--input', '-i', type=argparse.FileType('r'), default=sys.stdin,\n        metavar='PATH',\n        help=\"Input file (default: standard input).\")\n    parser.add_argument(\n        '--codes', '-c', type=argparse.FileType('r'), metavar='PATH',\n        required=True,\n        help=\"File with BPE codes (created by learn_bpe).\")\n    parser.add_argument(\n        '--output', '-o', type=argparse.FileType('w'), default=sys.stdout,\n        metavar='PATH',\n        help=\"Output file (default: standard output)\")\n    parser.add_argument(\n        '--separator', '-s', type=str, default='@@', metavar='STR',\n        help=\"Separator between non-final subword units (default: '%(default)s'))\")\n    parser.add_argument(\n        '--vocabulary', type=argparse.FileType('r'), default=None,\n        metavar=\"PATH\",\n        help=\"Vocabulary file (built with get_vocab.py). If provided, this script reverts any merge operations that produce an OOV.\")\n    parser.add_argument(\n        '--vocabulary-threshold', type=int, default=None,\n        metavar=\"INT\",\n        help=\"Vocabulary threshold. If vocabulary is provided, any word with frequency < threshold will be treated as OOV\")\n    parser.add_argument(\n        '--glossaries', type=str, nargs='+', default=None,\n        metavar=\"STR\",\n        help=\"Glossaries. The strings provided in glossaries will not be affected\"+\n             \"by the BPE (i.e. they will neither be broken into subwords, nor concatenated with other subwords\")\n\n    return parser\n\ndef get_pairs(word):\n    \"\"\"Return set of symbol pairs in a word.\n\n    word is represented as tuple of symbols (symbols being variable-length strings)\n    \"\"\"\n    pairs = set()\n    prev_char = word[0]\n    for char in word[1:]:\n        pairs.add((prev_char, char))\n        prev_char = char\n    return pairs\n\ndef encode(orig, bpe_codes, bpe_codes_reverse, vocab, separator, version, cache, glossaries=None):\n    \"\"\"Encode word based on list of BPE merge operations, which are applied consecutively\n    \"\"\"\n\n    if orig in cache:\n        return cache[orig]\n\n    if orig in glossaries:\n        cache[orig] = (orig,)\n        return (orig,)\n\n    if version == (0, 1):\n        word = tuple(orig) + ('</w>',)\n    elif version == (0, 2): # more consistent handling of word-final segments\n        word = tuple(orig[:-1]) + ( orig[-1] + '</w>',)\n    else:\n        raise NotImplementedError\n\n    pairs = get_pairs(word)\n\n    if not pairs:\n        return orig\n\n    while True:\n        bigram = min(pairs, key = lambda pair: bpe_codes.get(pair, float('inf')))\n        if bigram not in bpe_codes:\n            break\n        first, second = bigram\n        new_word = []\n        i = 0\n        while i < len(word):\n            try:\n                j = word.index(first, i)\n                new_word.extend(word[i:j])\n                i = j\n            except:\n                new_word.extend(word[i:])\n                break\n\n            if word[i] == first and i < len(word)-1 and word[i+1] == second:\n                new_word.append(first+second)\n                i += 2\n            else:\n                new_word.append(word[i])\n                i += 1\n        new_word = tuple(new_word)\n        word = new_word\n        if len(word) == 1:\n            break\n        else:\n            pairs = get_pairs(word)\n\n    # don't print end-of-word symbols\n    if word[-1] == '</w>':\n        word = word[:-1]\n    elif word[-1].endswith('</w>'):\n        word = word[:-1] + (word[-1].replace('</w>',''),)\n\n    if vocab:\n        word = check_vocab_and_split(word, bpe_codes_reverse, vocab, separator)\n\n    cache[orig] = word\n    return word\n\ndef recursive_split(segment, bpe_codes, vocab, separator, final=False):\n    \"\"\"Recursively split segment into smaller units (by reversing BPE merges)\n    until all units are either in-vocabulary, or cannot be split futher.\"\"\"\n\n    try:\n        if final:\n            left, right = bpe_codes[segment + '</w>']\n            right = right[:-4]\n        else:\n            left, right = bpe_codes[segment]\n    except:\n        #sys.stderr.write('cannot split {0} further.\\n'.format(segment))\n        yield segment\n        return\n\n    if left + separator in vocab:\n        yield left\n    else:\n        for item in recursive_split(left, bpe_codes, vocab, separator, False):\n            yield item\n\n    if (final and right in vocab) or (not final and right + separator in vocab):\n        yield right\n    else:\n        for item in recursive_split(right, bpe_codes, vocab, separator, final):\n            yield item\n\ndef check_vocab_and_split(orig, bpe_codes, vocab, separator):\n    \"\"\"Check for each segment in word if it is in-vocabulary,\n    and segment OOV segments into smaller units by reversing the BPE merge operations\"\"\"\n\n    out = []\n\n    for segment in orig[:-1]:\n        if segment + separator in vocab:\n            out.append(segment)\n        else:\n            #sys.stderr.write('OOV: {0}\\n'.format(segment))\n            for item in recursive_split(segment, bpe_codes, vocab, separator, False):\n                out.append(item)\n\n    segment = orig[-1]\n    if segment in vocab:\n        out.append(segment)\n    else:\n        #sys.stderr.write('OOV: {0}\\n'.format(segment))\n        for item in recursive_split(segment, bpe_codes, vocab, separator, True):\n            out.append(item)\n\n    return out\n\n\ndef read_vocabulary(vocab_file, threshold):\n    \"\"\"read vocabulary file produced by get_vocab.py, and filter according to frequency threshold.\n    \"\"\"\n\n    vocabulary = set()\n\n    for line in vocab_file:\n        word, freq = line.split()\n        freq = int(freq)\n        if threshold == None or freq >= threshold:\n            vocabulary.add(word)\n\n    return vocabulary\n\ndef isolate_glossary(word, glossary):\n    \"\"\"\n    Isolate a glossary present inside a word.\n\n    Returns a list of subwords. In which all 'glossary' glossaries are isolated \n\n    For example, if 'USA' is the glossary and '1934USABUSA' the word, the return value is:\n        ['1934', 'USA', 'B', 'USA']\n    \"\"\"\n    if word == glossary or glossary not in word:\n        return [word]\n    else:\n        splits = word.split(glossary)\n        segments = [segment.strip() for split in splits[:-1] for segment in [split, glossary] if segment != '']\n        return segments + [splits[-1].strip()] if splits[-1] != '' else segments\n\nif __name__ == '__main__':\n\n    # python 2/3 compatibility\n    if sys.version_info < (3, 0):\n        sys.stderr = codecs.getwriter('UTF-8')(sys.stderr)\n        sys.stdout = codecs.getwriter('UTF-8')(sys.stdout)\n        sys.stdin = codecs.getreader('UTF-8')(sys.stdin)\n    else:\n        sys.stdin = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')\n        sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')\n        sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', write_through=True, line_buffering=True)\n\n    parser = create_parser()\n    args = parser.parse_args()\n\n    # read/write files as UTF-8\n    args.codes = codecs.open(args.codes.name, encoding='utf-8')\n    if args.input.name != '<stdin>':\n        args.input = codecs.open(args.input.name, encoding='utf-8')\n    if args.output.name != '<stdout>':\n        args.output = codecs.open(args.output.name, 'w', encoding='utf-8')\n    if args.vocabulary:\n        args.vocabulary = codecs.open(args.vocabulary.name, encoding='utf-8')\n\n    if args.vocabulary:\n        vocabulary = read_vocabulary(args.vocabulary, args.vocabulary_threshold)\n    else:\n        vocabulary = None\n\n    bpe = BPE(args.codes, args.separator, vocabulary, args.glossaries)\n\n    for line in args.input:\n        args.output.write(bpe.segment(line).strip())\n        args.output.write('\\n')\n"
  },
  {
    "path": "texar_repo/bin/utils/learn_bpe",
    "content": "#!/usr/bin/env python\n# -*- coding: utf-8 -*-\n# Author: Rico Sennrich\n# flake8: noqa\n\n\"\"\"Use byte pair encoding (BPE) to learn a variable-length encoding of the vocabulary in a text.\nUnlike the original BPE, it does not compress the plain text, but can be used to reduce the vocabulary\nof a text to a configurable number of symbols, with only a small increase in the number of tokens.\n\nReference:\nRico Sennrich, Barry Haddow and Alexandra Birch (2016). Neural Machine Translation of Rare Words with Subword Units.\nProceedings of the 54th Annual Meeting of the Association for Computational Linguistics (ACL 2016). Berlin, Germany.\n\"\"\"\n# This file is retrieved from https://github.com/rsennrich/subword-nmt\n\nfrom __future__ import unicode_literals\n\nimport sys\nimport codecs\nimport re\nimport copy\nimport argparse\nfrom collections import defaultdict, Counter\n\n# hack for python2/3 compatibility\nfrom io import open\nargparse.open = open\n\ndef create_parser():\n    parser = argparse.ArgumentParser(\n        formatter_class=argparse.RawDescriptionHelpFormatter,\n        description=\"learn BPE-based word segmentation\")\n\n    parser.add_argument(\n        '--input', '-i', type=argparse.FileType('r'), default=sys.stdin,\n        metavar='PATH',\n        help=\"Input text (default: standard input).\")\n\n    parser.add_argument(\n        '--output', '-o', type=argparse.FileType('w'), default=sys.stdout,\n        metavar='PATH',\n        help=\"Output file for BPE codes (default: standard output)\")\n    parser.add_argument(\n        '--symbols', '-s', type=int, default=10000,\n        help=\"Create this many new symbols (each representing a character n-gram) (default: %(default)s))\")\n    parser.add_argument(\n        '--min-frequency', type=int, default=2, metavar='FREQ',\n        help='Stop if no symbol pair has frequency >= FREQ (default: %(default)s))')\n    parser.add_argument('--dict-input', action=\"store_true\",\n        help=\"If set, input file is interpreted as a dictionary where each line contains a word-count pair\")\n    parser.add_argument(\n        '--verbose', '-v', action=\"store_true\",\n        help=\"verbose mode.\")\n\n    return parser\n\ndef get_vocabulary(fobj, is_dict=False):\n    \"\"\"Read text and return dictionary that encodes vocabulary\n    \"\"\"\n    vocab = Counter()\n    for line in fobj:\n        if is_dict:\n            word, count = line.strip().split()\n            vocab[word] = int(count)\n        else:\n            for word in line.split():\n                vocab[word] += 1\n    return vocab\n\ndef update_pair_statistics(pair, changed, stats, indices):\n    \"\"\"Minimally update the indices and frequency of symbol pairs\n\n    if we merge a pair of symbols, only pairs that overlap with occurrences\n    of this pair are affected, and need to be updated.\n    \"\"\"\n    stats[pair] = 0\n    indices[pair] = defaultdict(int)\n    first, second = pair\n    new_pair = first+second\n    for j, word, old_word, freq in changed:\n\n        # find all instances of pair, and update frequency/indices around it\n        i = 0\n        while True:\n            # find first symbol\n            try:\n                i = old_word.index(first, i)\n            except ValueError:\n                break\n            # if first symbol is followed by second symbol, we've found an occurrence of pair (old_word[i:i+2])\n            if i < len(old_word)-1 and old_word[i+1] == second:\n                # assuming a symbol sequence \"A B C\", if \"B C\" is merged, reduce the frequency of \"A B\"\n                if i:\n                    prev = old_word[i-1:i+1]\n                    stats[prev] -= freq\n                    indices[prev][j] -= 1\n                if i < len(old_word)-2:\n                    # assuming a symbol sequence \"A B C B\", if \"B C\" is merged, reduce the frequency of \"C B\".\n                    # however, skip this if the sequence is A B C B C, because the frequency of \"C B\" will be reduced by the previous code block\n                    if old_word[i+2] != first or i >= len(old_word)-3 or old_word[i+3] != second:\n                        nex = old_word[i+1:i+3]\n                        stats[nex] -= freq\n                        indices[nex][j] -= 1\n                i += 2\n            else:\n                i += 1\n\n        i = 0\n        while True:\n            try:\n                # find new pair\n                i = word.index(new_pair, i)\n            except ValueError:\n                break\n            # assuming a symbol sequence \"A BC D\", if \"B C\" is merged, increase the frequency of \"A BC\"\n            if i:\n                prev = word[i-1:i+1]\n                stats[prev] += freq\n                indices[prev][j] += 1\n            # assuming a symbol sequence \"A BC B\", if \"B C\" is merged, increase the frequency of \"BC B\"\n            # however, if the sequence is A BC BC, skip this step because the count of \"BC BC\" will be incremented by the previous code block\n            if i < len(word)-1 and word[i+1] != new_pair:\n                nex = word[i:i+2]\n                stats[nex] += freq\n                indices[nex][j] += 1\n            i += 1\n\n\ndef get_pair_statistics(vocab):\n    \"\"\"Count frequency of all symbol pairs, and create index\"\"\"\n\n    # data structure of pair frequencies\n    stats = defaultdict(int)\n\n    #index from pairs to words\n    indices = defaultdict(lambda: defaultdict(int))\n\n    for i, (word, freq) in enumerate(vocab):\n        prev_char = word[0]\n        for char in word[1:]:\n            stats[prev_char, char] += freq\n            indices[prev_char, char][i] += 1\n            prev_char = char\n\n    return stats, indices\n\n\ndef replace_pair(pair, vocab, indices):\n    \"\"\"Replace all occurrences of a symbol pair ('A', 'B') with a new symbol 'AB'\"\"\"\n    first, second = pair\n    pair_str = ''.join(pair)\n    pair_str = pair_str.replace('\\\\','\\\\\\\\')\n    changes = []\n    pattern = re.compile(r'(?<!\\S)' + re.escape(first + ' ' + second) + r'(?!\\S)')\n    if sys.version_info < (3, 0):\n        iterator = indices[pair].iteritems()\n    else:\n        iterator = indices[pair].items()\n    for j, freq in iterator:\n        if freq < 1:\n            continue\n        word, freq = vocab[j]\n        new_word = ' '.join(word)\n        new_word = pattern.sub(pair_str, new_word)\n        new_word = tuple(new_word.split())\n\n        vocab[j] = (new_word, freq)\n        changes.append((j, new_word, word, freq))\n\n    return changes\n\ndef prune_stats(stats, big_stats, threshold):\n    \"\"\"Prune statistics dict for efficiency of max()\n\n    The frequency of a symbol pair never increases, so pruning is generally safe\n    (until we the most frequent pair is less frequent than a pair we previously pruned)\n    big_stats keeps full statistics for when we need to access pruned items\n    \"\"\"\n    for item,freq in list(stats.items()):\n        if freq < threshold:\n            del stats[item]\n            if freq < 0:\n                big_stats[item] += freq\n            else:\n                big_stats[item] = freq\n\n\ndef main(infile, outfile, num_symbols, min_frequency=2, verbose=False, is_dict=False):\n    \"\"\"Learn num_symbols BPE operations from vocabulary, and write to outfile.\n    \"\"\"\n\n    # version 0.2 changes the handling of the end-of-word token ('</w>');\n    # version numbering allows bckward compatibility\n    outfile.write('#version: 0.2\\n')\n\n    vocab = get_vocabulary(infile, is_dict)\n    vocab = dict([(tuple(x[:-1])+(x[-1]+'</w>',) ,y) for (x,y) in vocab.items()])\n    sorted_vocab = sorted(vocab.items(), key=lambda x: x[1], reverse=True)\n\n    stats, indices = get_pair_statistics(sorted_vocab)\n    big_stats = copy.deepcopy(stats)\n    # threshold is inspired by Zipfian assumption, but should only affect speed\n    threshold = max(stats.values()) / 10\n    for i in range(num_symbols):\n        if stats:\n            most_frequent = max(stats, key=lambda x: (stats[x], x))\n\n        # we probably missed the best pair because of pruning; go back to full statistics\n        if not stats or (i and stats[most_frequent] < threshold):\n            prune_stats(stats, big_stats, threshold)\n            stats = copy.deepcopy(big_stats)\n            most_frequent = max(stats, key=lambda x: (stats[x], x))\n            # threshold is inspired by Zipfian assumption, but should only affect speed\n            threshold = stats[most_frequent] * i/(i+10000.0)\n            prune_stats(stats, big_stats, threshold)\n\n        if stats[most_frequent] < min_frequency:\n            sys.stderr.write('no pair has frequency >= {0}. Stopping\\n'.format(min_frequency))\n            break\n\n        if verbose:\n            sys.stderr.write('pair {0}: {1} {2} -> {1}{2} (frequency {3})\\n'.format(i, most_frequent[0], most_frequent[1], stats[most_frequent]))\n        outfile.write('{0} {1}\\n'.format(*most_frequent))\n        changes = replace_pair(most_frequent, sorted_vocab, indices)\n        update_pair_statistics(most_frequent, changes, stats, indices)\n        stats[most_frequent] = 0\n        if not i % 100:\n            prune_stats(stats, big_stats, threshold)\n\n\nif __name__ == '__main__':\n\n    # python 2/3 compatibility\n    if sys.version_info < (3, 0):\n        sys.stderr = codecs.getwriter('UTF-8')(sys.stderr)\n        sys.stdout = codecs.getwriter('UTF-8')(sys.stdout)\n        sys.stdin = codecs.getreader('UTF-8')(sys.stdin)\n    else:\n        sys.stderr = codecs.getwriter('UTF-8')(sys.stderr.buffer)\n        sys.stdout = codecs.getwriter('UTF-8')(sys.stdout.buffer)\n        sys.stdin = codecs.getreader('UTF-8')(sys.stdin.buffer)\n\n    parser = create_parser()\n    args = parser.parse_args()\n\n    # read/write files as UTF-8\n    if args.input.name != '<stdin>':\n        args.input = codecs.open(args.input.name, encoding='utf-8')\n    if args.output.name != '<stdout>':\n        args.output = codecs.open(args.output.name, 'w', encoding='utf-8')\n\n    main(args.input, args.output, args.symbols, args.min_frequency, args.verbose, is_dict=args.dict_input)\n"
  },
  {
    "path": "texar_repo/bin/utils/make_vocab.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Creates vocabulary from a set of data files.\n\nExample usage:\n\n$ python make_vocab.py --files './data/train*'\n\nNote that if the file path is a pattern, wrap it with quotation masks.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\n# pylint: disable=invalid-name\n\nimport sys\nfrom io import open\n\nimport tensorflow as tf\n\nimport texar as tx\n\nPy3 = sys.version_info[0] == 3\n\nflags = tf.flags\n\nflags.DEFINE_string(\"files\", \"./train.txt\",\n                    \"Path to the data files. Can be a pattern, e.g., \"\n                    \"'/path/to/train*', '/path/to/train[12]'. \"\n                    \"Caution: If the path is a pattern, you must wrap the path \"\n                    \"with quotation marks. e.g., \"\n                    \"python make_vocab.py --files './data/*'\")\nflags.DEFINE_integer(\"max_vocab_size\", -1,\n                     \"Maximum size of the vocabulary. Low frequency words \"\n                     \"that exceeding the limit will be discarded. \"\n                     \"Set to `-1` if no truncation is wanted.\")\nflags.DEFINE_boolean(\"count\", False, \"Whether to print word count in the \"\n                     \"output file. Note that Texar data modules require a \"\n                     \"vocab file without word count. But the functionality \"\n                     \"can be useful to decide vocab truncation.\")\nflags.DEFINE_string(\"output_path\", \"./vocab.txt\",\n                    \"Path of the output vocab file.\")\nflags.DEFINE_string(\"newline_token\", None,\n                    \"The token to replace the original newline token '\\n'. \"\n                    \"For example, `--newline_token '<EOS>'`. If not \"\n                    \"specified, no replacement is performed.\")\n\nFLAGS = flags.FLAGS\n\n\ndef main(_):\n    \"\"\"Makes vocab.\n    \"\"\"\n    filenames = tx.utils.get_files(FLAGS.files)\n\n    if FLAGS.count:\n        vocab, count = tx.data.make_vocab(\n            filenames,\n            max_vocab_size=FLAGS.max_vocab_size,\n            newline_token=FLAGS.newline_token,\n            return_count=True)\n\n        with open(FLAGS.output_path, \"w\", encoding=\"utf-8\") as fout:\n            for v, c in zip(vocab, count):\n                fout.write('{}\\t{}\\n'.format(v, c))\n    else:\n        vocab = tx.data.make_vocab(\n            filenames,\n            max_vocab_size=FLAGS.max_vocab_size,\n            newline_token=FLAGS.newline_token)\n\n        with open(FLAGS.output_path, \"w\", encoding=\"utf-8\") as fout:\n            fout.write('\\n'.join(vocab))\n\n\nif __name__ == \"__main__\":\n    tf.app.run()\n"
  },
  {
    "path": "texar_repo/bin/utils/multi-bleu.perl",
    "content": "#!/usr/bin/env perl\n#\n# This file is part of moses.  Its use is licensed under the GNU Lesser General\n# Public License version 2.1 or, at your option, any later version.\n\n# $Id$\nuse warnings;\nuse strict;\n\nmy $lowercase = 0;\nif ($ARGV[0] eq \"-lc\") {\n  $lowercase = 1;\n  shift;\n}\n\nmy $stem = $ARGV[0];\nif (!defined $stem) {\n  print STDERR \"usage: multi-bleu.pl [-lc] reference < hypothesis\\n\";\n  print STDERR \"Reads the references from reference or reference0, reference1, ...\\n\";\n  exit(1);\n}\n\n$stem .= \".ref\" if !-e $stem && !-e $stem.\"0\" && -e $stem.\".ref0\";\n\nmy @REF;\nmy $ref=0;\nwhile(-e \"$stem$ref\") {\n    &add_to_ref(\"$stem$ref\",\\@REF);\n    $ref++;\n}\n&add_to_ref($stem,\\@REF) if -e $stem;\ndie(\"ERROR: could not find reference file $stem\") unless scalar @REF;\n\n# add additional references explicitly specified on the command line\nshift;\nforeach my $stem (@ARGV) {\n    &add_to_ref($stem,\\@REF) if -e $stem;\n}\n\n\n\nsub add_to_ref {\n    my ($file,$REF) = @_;\n    my $s=0;\n    if ($file =~ /.gz$/) {\n\topen(REF,\"gzip -dc $file|\") or die \"Can't read $file\";\n    } else { \n\topen(REF,$file) or die \"Can't read $file\";\n    }\n    while(<REF>) {\n\tchop;\n\tpush @{$$REF[$s++]}, $_;\n    }\n    close(REF);\n}\n\nmy(@CORRECT,@TOTAL,$length_translation,$length_reference);\nmy $s=0;\nwhile(<STDIN>) {\n    chop;\n    $_ = lc if $lowercase;\n    my @WORD = split;\n    my %REF_NGRAM = ();\n    my $length_translation_this_sentence = scalar(@WORD);\n    my ($closest_diff,$closest_length) = (9999,9999);\n    foreach my $reference (@{$REF[$s]}) {\n#      print \"$s $_ <=> $reference\\n\";\n  $reference = lc($reference) if $lowercase;\n\tmy @WORD = split(' ',$reference);\n\tmy $length = scalar(@WORD);\n        my $diff = abs($length_translation_this_sentence-$length);\n\tif ($diff < $closest_diff) {\n\t    $closest_diff = $diff;\n\t    $closest_length = $length;\n\t    # print STDERR \"$s: closest diff \".abs($length_translation_this_sentence-$length).\" = abs($length_translation_this_sentence-$length), setting len: $closest_length\\n\";\n\t} elsif ($diff == $closest_diff) {\n            $closest_length = $length if $length < $closest_length;\n            # from two references with the same closeness to me\n            # take the *shorter* into account, not the \"first\" one.\n        }\n\tfor(my $n=1;$n<=4;$n++) {\n\t    my %REF_NGRAM_N = ();\n\t    for(my $start=0;$start<=$#WORD-($n-1);$start++) {\n\t\tmy $ngram = \"$n\";\n\t\tfor(my $w=0;$w<$n;$w++) {\n\t\t    $ngram .= \" \".$WORD[$start+$w];\n\t\t}\n\t\t$REF_NGRAM_N{$ngram}++;\n\t    }\n\t    foreach my $ngram (keys %REF_NGRAM_N) {\n\t\tif (!defined($REF_NGRAM{$ngram}) ||\n\t\t    $REF_NGRAM{$ngram} < $REF_NGRAM_N{$ngram}) {\n\t\t    $REF_NGRAM{$ngram} = $REF_NGRAM_N{$ngram};\n#\t    print \"$i: REF_NGRAM{$ngram} = $REF_NGRAM{$ngram}<BR>\\n\";\n\t\t}\n\t    }\n\t}\n    }\n    $length_translation += $length_translation_this_sentence;\n    $length_reference += $closest_length;\n    for(my $n=1;$n<=4;$n++) {\n\tmy %T_NGRAM = ();\n\tfor(my $start=0;$start<=$#WORD-($n-1);$start++) {\n\t    my $ngram = \"$n\";\n\t    for(my $w=0;$w<$n;$w++) {\n\t\t$ngram .= \" \".$WORD[$start+$w];\n\t    }\n\t    $T_NGRAM{$ngram}++;\n\t}\n\tforeach my $ngram (keys %T_NGRAM) {\n\t    $ngram =~ /^(\\d+) /;\n\t    my $n = $1;\n            # my $corr = 0;\n#\tprint \"$i e $ngram $T_NGRAM{$ngram}<BR>\\n\";\n\t    $TOTAL[$n] += $T_NGRAM{$ngram};\n\t    if (defined($REF_NGRAM{$ngram})) {\n\t\tif ($REF_NGRAM{$ngram} >= $T_NGRAM{$ngram}) {\n\t\t    $CORRECT[$n] += $T_NGRAM{$ngram};\n                    # $corr =  $T_NGRAM{$ngram};\n#\t    print \"$i e correct1 $T_NGRAM{$ngram}<BR>\\n\";\n\t\t}\n\t\telse {\n\t\t    $CORRECT[$n] += $REF_NGRAM{$ngram};\n                    # $corr =  $REF_NGRAM{$ngram};\n#\t    print \"$i e correct2 $REF_NGRAM{$ngram}<BR>\\n\";\n\t\t}\n\t    }\n            # $REF_NGRAM{$ngram} = 0 if !defined $REF_NGRAM{$ngram};\n            # print STDERR \"$ngram: {$s, $REF_NGRAM{$ngram}, $T_NGRAM{$ngram}, $corr}\\n\"\n\t}\n    }\n    $s++;\n}\nmy $brevity_penalty = 1;\nmy $bleu = 0;\n\nmy @bleu=();\n\nfor(my $n=1;$n<=4;$n++) {\n  if (defined ($TOTAL[$n])){\n    $bleu[$n]=($TOTAL[$n])?$CORRECT[$n]/$TOTAL[$n]:0;\n    # print STDERR \"CORRECT[$n]:$CORRECT[$n] TOTAL[$n]:$TOTAL[$n]\\n\";\n  }else{\n    $bleu[$n]=0;\n  }\n}\n\nif ($length_reference==0){\n  printf \"BLEU = 0, 0/0/0/0 (BP=0, ratio=0, hyp_len=0, ref_len=0)\\n\";\n  exit(1);\n}\n\nif ($length_translation<$length_reference) {\n  $brevity_penalty = exp(1-$length_reference/$length_translation);\n}\n$bleu = $brevity_penalty * exp((my_log( $bleu[1] ) +\n\t\t\t\tmy_log( $bleu[2] ) +\n\t\t\t\tmy_log( $bleu[3] ) +\n\t\t\t\tmy_log( $bleu[4] ) ) / 4) ;\nprintf \"BLEU = %.2f, %.1f/%.1f/%.1f/%.1f (BP=%.3f, ratio=%.3f, hyp_len=%d, ref_len=%d)\\n\",\n    100*$bleu,\n    100*$bleu[1],\n    100*$bleu[2],\n    100*$bleu[3],\n    100*$bleu[4],\n    $brevity_penalty,\n    $length_translation / $length_reference,\n    $length_translation,\n    $length_reference;\n\nsub my_log {\n  return -9999999999 unless $_[0];\n  return log($_[0]);\n}\n"
  },
  {
    "path": "texar_repo/bin/utils/spm_decode",
    "content": "#!/usr/bin/env python\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport sys\nfrom argparse import ArgumentParser\nimport sentencepiece as spm\nfrom io import open\n\nparser = ArgumentParser(description='SentencePiece Train')\nparser.add_argument('--input_format', type=str)\nparser.add_argument('--model', type=str)\nparser.add_argument('--infile', type=str)\nparser.add_argument('--outfile', type=str)\nargs = parser.parse_args()\n\nsp = spm.SentencePieceProcessor()\nsp.Load(\"{}\".format(args.model))\n\nmap_func = None\nif args.input_format == 'piece':\n    func = sp.DecodePieces\nelse:\n    func = sp.DecodeIds\n    map_func = int\n\nwith open(args.infile, encoding='utf-8') as infile, \\\n    open(args.outfile, 'w+', encoding='utf-8') as outfile:\n    for line in infile.readlines():\n        line = line.strip().split()\n        if map_func:\n            line = list(map(map_func, line))\n        outfile.write('{}\\n'.format(func(line)))\n"
  },
  {
    "path": "texar_repo/bin/utils/spm_encode",
    "content": "#!/usr/bin/env python\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport sys\nfrom io import open\nfrom argparse import ArgumentParser\nimport sentencepiece as spm\n\nPy3 = sys.version_info[0] == 3\n\nparser = ArgumentParser(description='SentencePiece Train')\nparser.add_argument('--output_format', type=str)\nparser.add_argument('--model', type=str)\nparser.add_argument('--infile', type=str)\nparser.add_argument('--outfile', type=str)\nargs = parser.parse_args()\n\nsp = spm.SentencePieceProcessor()\nsp.Load(\"{}\".format(args.model))\n\nif args.output_format == 'piece':\n    func = sp.EncodeAsPieces\nelse:\n    func = sp.EncodeAsIds\n\nwith open(args.infile, encoding='utf-8') as infile, \\\n    open(args.outfile, 'w+', encoding='utf-8') as outfile:\n    for line in infile.readlines():\n        line = line.strip()\n        if Py3:\n            encoded = map(str, func(line))\n        else:\n            encoded = map(unicode, func(line))\n        outfile.write('{}\\n'.format(' '.join(encoded)))\n"
  },
  {
    "path": "texar_repo/bin/utils/spm_train",
    "content": "#!/usr/bin/env python\n\nfrom argparse import ArgumentParser\nimport sentencepiece as spm\n\nparser = ArgumentParser(description='SentencePiece Train')\nparser.add_argument('--input', type=str)\nparser.add_argument('--vocab_size', type=str)\nparser.add_argument('--model_prefix', type=str)\nargs = parser.parse_args()\n\nspm.SentencePieceTrainer.Train('--input={} --model_prefix={} --vocab_size={}'.format(args.input,\n                                                                                     args.model_prefix,\n                                                                                     args.vocab_size))\nprint(args)\n"
  },
  {
    "path": "texar_repo/config.py",
    "content": "import texar as tx\ndcoder_config = {\n    'dim': 768,\n    'num_blocks': 6,\n    'multihead_attention': {\n        'num_heads': 8,\n        'output_dim': 768\n        # See documentation for more optional hyperparameters\n    },\n    'position_embedder_hparams': {\n        'dim': 768\n    },\n    'initializer': {\n        'type': 'variance_scaling_initializer',\n        'kwargs': {\n            'scale': 1.0,\n            'mode': 'fan_avg',\n            'distribution': 'uniform',\n        },\n    },\n    'poswise_feedforward': tx.modules.default_transformer_poswise_net_hparams(\n        output_dim=768)\n}\n\nloss_label_confidence = 0.9\n\nrandom_seed = 1234\nbeam_width = 5\nalpha = 0.6\nhidden_dim = 768\n\n\nopt = {\n    'optimizer': {\n        'type': 'AdamOptimizer',\n        'kwargs': {\n            'beta1': 0.9,\n            'beta2': 0.997,\n            'epsilon': 1e-9\n        }\n    }\n}\n\n\n#warmup steps must be 0.1% of number of iterations\nlr = {\n    'learning_rate_schedule': 'constant.linear_warmup.rsqrt_decay.rsqrt_depth',\n    'lr_constant': 2 * (hidden_dim ** -0.5),\n    'static_lr': 1e-3,\n    'warmup_steps': 10000,\n}\n\nbos_token_id =101\neos_token_id = 102\n\nmodel_dir= \"./models\"\nrun_mode= \"train_and_evaluate\"\nbatch_size = 32\ntest_batch_size = 32\n\nmax_train_steps = 100000\n\ndisplay_steps = 100\neval_steps = 100000\n\nmax_decoding_length = 400\n\nmax_seq_length_src = 512\nmax_seq_length_tgt = 400\n\ntrain_file = \"data/train.tf_reccord\"\neval_file = \"data/eval.tf_record\"\n\nbert_pretrain_dir=\"./bert_uncased_model\"\n"
  },
  {
    "path": "texar_repo/docs/Makefile",
    "content": "# Makefile for Sphinx documentation\n#\n\n# You can set these variables from the command line.\nSPHINXOPTS    =\nSPHINXBUILD   = sphinx-build\nPAPER         =\nBUILDDIR      = _build\n\n# User-friendly check for sphinx-build\nifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)\n\t$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\\'t have Sphinx installed, grab it from http://sphinx-doc.org/)\nendif\n\n# Internal variables.\nPAPEROPT_a4     = -D latex_paper_size=a4\nPAPEROPT_letter = -D latex_paper_size=letter\nALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .\n# the i18n builder cannot share the environment and doctrees with the others\nI18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .\n\n.PHONY: help\nhelp:\n\t@echo \"Please use \\`make <target>' where <target> is one of\"\n\t@echo \"  html       to make standalone HTML files\"\n\t@echo \"  dirhtml    to make HTML files named index.html in directories\"\n\t@echo \"  singlehtml to make a single large HTML file\"\n\t@echo \"  pickle     to make pickle files\"\n\t@echo \"  json       to make JSON files\"\n\t@echo \"  htmlhelp   to make HTML files and a HTML help project\"\n\t@echo \"  qthelp     to make HTML files and a qthelp project\"\n\t@echo \"  applehelp  to make an Apple Help Book\"\n\t@echo \"  devhelp    to make HTML files and a Devhelp project\"\n\t@echo \"  epub       to make an epub\"\n\t@echo \"  epub3      to make an epub3\"\n\t@echo \"  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter\"\n\t@echo \"  latexpdf   to make LaTeX files and run them through pdflatex\"\n\t@echo \"  latexpdfja to make LaTeX files and run them through platex/dvipdfmx\"\n\t@echo \"  text       to make text files\"\n\t@echo \"  man        to make manual pages\"\n\t@echo \"  texinfo    to make Texinfo files\"\n\t@echo \"  info       to make Texinfo files and run them through makeinfo\"\n\t@echo \"  gettext    to make PO message catalogs\"\n\t@echo \"  changes    to make an overview of all changed/added/deprecated items\"\n\t@echo \"  xml        to make Docutils-native XML files\"\n\t@echo \"  pseudoxml  to make pseudoxml-XML files for display purposes\"\n\t@echo \"  linkcheck  to check all external links for integrity\"\n\t@echo \"  doctest    to run all doctests embedded in the documentation (if enabled)\"\n\t@echo \"  coverage   to run coverage check of the documentation (if enabled)\"\n\t@echo \"  dummy      to check syntax errors of document sources\"\n\n.PHONY: clean\nclean:\n\trm -rf $(BUILDDIR)/*\n\n.PHONY: html\nhtml:\n\t$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html\n\t@echo\n\t@echo \"Build finished. The HTML pages are in $(BUILDDIR)/html.\"\n\n.PHONY: dirhtml\ndirhtml:\n\t$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml\n\t@echo\n\t@echo \"Build finished. The HTML pages are in $(BUILDDIR)/dirhtml.\"\n\n.PHONY: singlehtml\nsinglehtml:\n\t$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml\n\t@echo\n\t@echo \"Build finished. The HTML page is in $(BUILDDIR)/singlehtml.\"\n\n.PHONY: pickle\npickle:\n\t$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle\n\t@echo\n\t@echo \"Build finished; now you can process the pickle files.\"\n\n.PHONY: json\njson:\n\t$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json\n\t@echo\n\t@echo \"Build finished; now you can process the JSON files.\"\n\n.PHONY: htmlhelp\nhtmlhelp:\n\t$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp\n\t@echo\n\t@echo \"Build finished; now you can run HTML Help Workshop with the\" \\\n\t      \".hhp project file in $(BUILDDIR)/htmlhelp.\"\n\n.PHONY: qthelp\nqthelp:\n\t$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp\n\t@echo\n\t@echo \"Build finished; now you can run \"qcollectiongenerator\" with the\" \\\n\t      \".qhcp project file in $(BUILDDIR)/qthelp, like this:\"\n\t@echo \"# qcollectiongenerator $(BUILDDIR)/qthelp/texar.qhcp\"\n\t@echo \"To view the help file:\"\n\t@echo \"# assistant -collectionFile $(BUILDDIR)/qthelp/texar.qhc\"\n\n.PHONY: applehelp\napplehelp:\n\t$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp\n\t@echo\n\t@echo \"Build finished. The help book is in $(BUILDDIR)/applehelp.\"\n\t@echo \"N.B. You won't be able to view it unless you put it in\" \\\n\t      \"~/Library/Documentation/Help or install it in your application\" \\\n\t      \"bundle.\"\n\n.PHONY: devhelp\ndevhelp:\n\t$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp\n\t@echo\n\t@echo \"Build finished.\"\n\t@echo \"To view the help file:\"\n\t@echo \"# mkdir -p $$HOME/.local/share/devhelp/texar\"\n\t@echo \"# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/texar\"\n\t@echo \"# devhelp\"\n\n.PHONY: epub\nepub:\n\t$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub\n\t@echo\n\t@echo \"Build finished. The epub file is in $(BUILDDIR)/epub.\"\n\n.PHONY: epub3\nepub3:\n\t$(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3\n\t@echo\n\t@echo \"Build finished. The epub3 file is in $(BUILDDIR)/epub3.\"\n\n.PHONY: latex\nlatex:\n\t$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex\n\t@echo\n\t@echo \"Build finished; the LaTeX files are in $(BUILDDIR)/latex.\"\n\t@echo \"Run \\`make' in that directory to run these through (pdf)latex\" \\\n\t      \"(use \\`make latexpdf' here to do that automatically).\"\n\n.PHONY: latexpdf\nlatexpdf:\n\t$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex\n\t@echo \"Running LaTeX files through pdflatex...\"\n\t$(MAKE) -C $(BUILDDIR)/latex all-pdf\n\t@echo \"pdflatex finished; the PDF files are in $(BUILDDIR)/latex.\"\n\n.PHONY: latexpdfja\nlatexpdfja:\n\t$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex\n\t@echo \"Running LaTeX files through platex and dvipdfmx...\"\n\t$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja\n\t@echo \"pdflatex finished; the PDF files are in $(BUILDDIR)/latex.\"\n\n.PHONY: text\ntext:\n\t$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text\n\t@echo\n\t@echo \"Build finished. The text files are in $(BUILDDIR)/text.\"\n\n.PHONY: man\nman:\n\t$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man\n\t@echo\n\t@echo \"Build finished. The manual pages are in $(BUILDDIR)/man.\"\n\n.PHONY: texinfo\ntexinfo:\n\t$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo\n\t@echo\n\t@echo \"Build finished. The Texinfo files are in $(BUILDDIR)/texinfo.\"\n\t@echo \"Run \\`make' in that directory to run these through makeinfo\" \\\n\t      \"(use \\`make info' here to do that automatically).\"\n\n.PHONY: info\ninfo:\n\t$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo\n\t@echo \"Running Texinfo files through makeinfo...\"\n\tmake -C $(BUILDDIR)/texinfo info\n\t@echo \"makeinfo finished; the Info files are in $(BUILDDIR)/texinfo.\"\n\n.PHONY: gettext\ngettext:\n\t$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale\n\t@echo\n\t@echo \"Build finished. The message catalogs are in $(BUILDDIR)/locale.\"\n\n.PHONY: changes\nchanges:\n\t$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes\n\t@echo\n\t@echo \"The overview file is in $(BUILDDIR)/changes.\"\n\n.PHONY: linkcheck\nlinkcheck:\n\t$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck\n\t@echo\n\t@echo \"Link check complete; look for any errors in the above output \" \\\n\t      \"or in $(BUILDDIR)/linkcheck/output.txt.\"\n\n.PHONY: doctest\ndoctest:\n\t$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest\n\t@echo \"Testing of doctests in the sources finished, look at the \" \\\n\t      \"results in $(BUILDDIR)/doctest/output.txt.\"\n\n.PHONY: coverage\ncoverage:\n\t$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage\n\t@echo \"Testing of coverage in the sources finished, look at the \" \\\n\t      \"results in $(BUILDDIR)/coverage/python.txt.\"\n\n.PHONY: xml\nxml:\n\t$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml\n\t@echo\n\t@echo \"Build finished. The XML files are in $(BUILDDIR)/xml.\"\n\n.PHONY: pseudoxml\npseudoxml:\n\t$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml\n\t@echo\n\t@echo \"Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml.\"\n\n.PHONY: dummy\ndummy:\n\t$(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy\n\t@echo\n\t@echo \"Build finished. Dummy builder generates no files.\"\n"
  },
  {
    "path": "texar_repo/docs/_static/css/custom_theme.css",
    "content": "/* This style sheet is heavily inspired by PyTorch docs . */\n/* https://github.com/pytorch/pytorch/blob/master/docs/source/_static/css/pytorch_theme.css */\n\nbody {\n    font-family: \"Lato\",\"proxima-nova\",\"Helvetica Neue\",Arial,sans-serif;\n}\n\nh1, h2, .rst-content .toctree-wrapper p.caption, h3, h4, h5, h6, legend, p.caption {\n    font-family: \"Lato\",\"proxima-nova\",\"Helvetica Neue\",Arial,sans-serif;\n}\n\n/* Literal color */\n.rst-content tt.literal, .rst-content tt.literal, .rst-content code.literal {\n    color: #DB2407;\n}\n\n/* Docs top-left background color */\n.wy-side-nav-search {\n    background-color: #fff;\n}\n\n/* Fixes for mobile */\n.wy-nav-top {\n    background-color: #fff;\n    background-image: url('../img/logo_h.png');\n    background-repeat: no-repeat;\n    background-position: center;\n    padding: 0;\n    margin: 0.6em 1em;\n}\n\n.wy-nav-top > a { \n    display: none;\n}\n\n@media screen and (max-width: 768px) {\n    .wy-side-nav-search>a img.logo {\n        height: 60px;\n    }   \n}\n\n/* This is needed to ensure that logo above search scales properly */\n.wy-side-nav-search a {\n    display: block;\n}\n\n.wy-side-nav-search>div.version {\n    color: #000;\n}\n\n/* For hidden headers that appear in TOC tree */\n/* see http://stackoverflow.com/a/32363545/3343043 */\n.rst-content .hidden-section {\n    display: none;\n}\n\nnav .hidden-section {\n    display: inherit;\n"
  },
  {
    "path": "texar_repo/docs/code/agents.rst",
    "content": ".. role:: hidden\n    :class: hidden-section\n\nAgents\n*******\n\n\nSequence Agents\n=================\n\n:hidden:`SeqPGAgent`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.agents.SeqPGAgent\n    :members:\n    :inherited-members:\n\n\nEpisodic Agents\n=================\n\n:hidden:`EpisodicAgentBase`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.agents.EpisodicAgentBase\n    :members:\n    :inherited-members:\n\n:hidden:`PGAgent`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.agents.PGAgent\n    :members:\n    :inherited-members:\n\n:hidden:`DQNAgent`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.agents.DQNAgent\n    :members:\n    :inherited-members:\n\n:hidden:`ActorCriticAgent`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.agents.ActorCriticAgent\n    :members:\n    :inherited-members:\n\nAgent Utils\n============\n\n:hidden:`Space`\n~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.agents.Space\n    :members:\n\n:hidden:`EnvConfig`\n~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.agents.EnvConfig\n    :members:\n\n:hidden:`convert_gym_space`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.agents.convert_gym_space\n\n:hidden:`get_gym_env_config`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.agents.get_gym_env_config\n"
  },
  {
    "path": "texar_repo/docs/code/context.rst",
    "content": ".. role:: hidden\n    :class: hidden-section\n\nContext\n********\n\nGlobal Mode\n===========\n\n:hidden:`global_mode`\n~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.global_mode\n\n:hidden:`global_mode_train`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.global_mode_train\n\n:hidden:`global_mode_eval`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.global_mode_eval\n\n:hidden:`global_mode_predict`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.global_mode_predict\n\n:hidden:`valid_modes`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.valid_modes\n"
  },
  {
    "path": "texar_repo/docs/code/core.rst",
    "content": ".. role:: hidden\n    :class: hidden-section\n\nCore\n****\n\n\nCells\n=====\n\n:hidden:`default_rnn_cell_hparams`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.core.default_rnn_cell_hparams \n\n:hidden:`get_rnn_cell`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.core.get_rnn_cell\n\n:hidden:`get_rnn_cell_trainable_variables`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.core.get_rnn_cell_trainable_variables\n\nLayers\n======\n\n:hidden:`get_layer`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.core.get_layer\n\n:hidden:`MaxReducePooling1D`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.core.MaxReducePooling1D\n    :members:\n\n:hidden:`AverageReducePooling1D`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.core.AverageReducePooling1D\n    :members:\n\n:hidden:`get_pooling_layer_hparams`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.core.get_pooling_layer_hparams\n\n:hidden:`MergeLayer`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.core.MergeLayer\n    :members:\n\n:hidden:`SequentialLayer`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.core.SequentialLayer\n    :members:\n\n:hidden:`default_regularizer_hparams`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.core.default_regularizer_hparams\n\n:hidden:`get_regularizer`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.core.get_regularizer\n\n:hidden:`get_initializer`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.core.get_initializer\n\n:hidden:`get_activation_fn`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.core.get_activation_fn\n\n:hidden:`get_constraint_fn`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.core.get_constraint_fn\n\n:hidden:`default_conv1d_kwargs`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.core.default_conv1d_kwargs\n\n:hidden:`default_dense_kwargs`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.core.default_dense_kwargs\n\n\nOptimization\n=============\n\n:hidden:`default_optimization_hparams`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.core.default_optimization_hparams\n\n:hidden:`get_train_op`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.core.get_train_op\n\n:hidden:`get_optimizer_fn`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.core.get_optimizer_fn\n\n:hidden:`get_optimizer`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.core.get_optimizer\n\n:hidden:`get_learning_rate_decay_fn`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.core.get_learning_rate_decay_fn\n\n:hidden:`get_gradient_clip_fn`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.core.get_gradient_clip_fn\n\n\nExploration\n============\n\n:hidden:`EpsilonLinearDecayExploration`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.core.EpsilonLinearDecayExploration\n    :members:\n\n:hidden:`ExplorationBase`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.core.ExplorationBase\n    :members:\n\nReplay Memories\n================\n\n:hidden:`DequeReplayMemory`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.core.DequeReplayMemory\n    :members:\n\n:hidden:`ReplayMemoryBase`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.core.ReplayMemoryBase\n    :members:\n"
  },
  {
    "path": "texar_repo/docs/code/data.rst",
    "content": ".. role:: hidden\n    :class: hidden-section\n\nData\n*******\n\nVocabulary\n==========\n\n:hidden:`SpecialTokens`\n~~~~~~~~~~~~~~~~~~~~~~~~\n\n.. autoclass:: texar.data.SpecialTokens\n    :members:\n\n:hidden:`Vocab`\n~~~~~~~~~~~~~~~~~~~~~~~~\n\n.. autoclass:: texar.data.Vocab\n    :members:\n\nEmbedding\n==========\n\n:hidden:`Embedding`\n~~~~~~~~~~~~~~~~~~~~~~~~\n\n.. autoclass:: texar.data.Embedding\n    :members:\n\n:hidden:`load_word2vec`\n~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.data.load_word2vec\n\n:hidden:`load_glove`\n~~~~~~~~~~~~~~~~~~~~~~~~\n\n.. autofunction:: texar.data.load_glove\n\nData\n==========\n\n:hidden:`DataBase`\n~~~~~~~~~~~~~~~~~~~~~~~~\n\n.. autoclass:: texar.data.DataBase\n    :members:\n\n:hidden:`MonoTextData`\n~~~~~~~~~~~~~~~~~~~~~~~~\n\n.. autoclass:: texar.data.MonoTextData\n    :members:\n    :inherited-members:\n    :exclude-members: make_vocab,make_embedding\n\n:hidden:`PairedTextData`\n~~~~~~~~~~~~~~~~~~~~~~~~\n\n.. autoclass:: texar.data.PairedTextData\n    :members:\n    :inherited-members:\n    :exclude-members: make_vocab,make_embedding\n\n:hidden:`ScalarData`\n~~~~~~~~~~~~~~~~~~~~~~~~\n\n.. autoclass:: texar.data.ScalarData\n    :members:\n    :inherited-members:\n\n:hidden:`MultiAlignedData`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n.. autoclass:: texar.data.MultiAlignedData\n    :members:\n    :inherited-members:\n    :exclude-members: make_vocab,make_embedding\n\n:hidden:`TextDataBase`\n~~~~~~~~~~~~~~~~~~~~~~~~\n\n.. autoclass:: texar.data.TextDataBase\n    :members:\n\n\nData Iterators\n===============\n\n:hidden:`DataIteratorBase`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n.. autoclass:: texar.data.DataIteratorBase\n    :members:\n\n:hidden:`DataIterator`\n~~~~~~~~~~~~~~~~~~~~~~~~\n\n.. autoclass:: texar.data.DataIterator\n    :members:\n\n:hidden:`TrainTestDataIterator`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n.. autoclass:: texar.data.TrainTestDataIterator\n    :members:\n\n\n:hidden:`FeedableDataIterator`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n.. autoclass:: texar.data.FeedableDataIterator\n    :members:\n\n\n:hidden:`TrainTestFeedableDataIterator`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n.. autoclass:: texar.data.TrainTestFeedableDataIterator\n    :members:\n\nData Utils\n==========\n\n:hidden:`random_shard_dataset`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.data.random_shard_dataset\n\n:hidden:`maybe_tuple`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.data.maybe_tuple\n\n:hidden:`make_partial`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.data.make_partial\n\n:hidden:`maybe_download`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.data.maybe_download\n\n:hidden:`read_words`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.data.read_words\n\n:hidden:`make_vocab`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.data.make_vocab\n\n:hidden:`count_file_lines`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.data.count_file_lines\n\n:hidden:`make_chained_transformation`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.data.make_chained_transformation\n\n:hidden:`make_combined_transformation`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.data.make_combined_transformation\n"
  },
  {
    "path": "texar_repo/docs/code/evals.rst",
    "content": ".. role:: hidden\n    :class: hidden-section\n\nEvaluations\n***********\n\n\nBLEU\n==========\n\n:hidden:`sentence_bleu`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.evals.sentence_bleu\n\n:hidden:`corpus_bleu`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.evals.corpus_bleu\n\n:hidden:`sentence_bleu_moses`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.evals.sentence_bleu_moses\n\n:hidden:`corpus_bleu_moses`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.evals.corpus_bleu_moses\n\n\nAccuracy\n========\n\n:hidden:`accuracy`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.evals.accuracy\n\n\n:hidden:`binary_clas_accurac`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.evals.binary_clas_accuracy\n"
  },
  {
    "path": "texar_repo/docs/code/hyperparams.rst",
    "content": ".. role:: hidden\n    :class: hidden\n\nHParams\n*******\n\n.. autoclass:: texar.HParams\n    :members:\n"
  },
  {
    "path": "texar_repo/docs/code/losses.rst",
    "content": ".. role:: hidden\n    :class: hidden-section\n\nLoss Functions\n**************\n\nMLE Loss\n==========\n\n:hidden:`sequence_softmax_cross_entropy`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.losses.sequence_softmax_cross_entropy\n\n:hidden:`sequence_sparse_softmax_cross_entropy`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.losses.sequence_sparse_softmax_cross_entropy\n\n:hidden:`sequence_sigmoid_cross_entropy`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.losses.sequence_sigmoid_cross_entropy\n\n:hidden:`binary_sigmoid_cross_entropy`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.losses.binary_sigmoid_cross_entropy\n\n:hidden:`binary_sigmoid_cross_entropy_with_clas`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.losses.binary_sigmoid_cross_entropy_with_clas\n\n\nPolicy Gradient Loss\n=====================\n\n:hidden:`pg_loss_with_logits`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.losses.pg_loss_with_logits\n\n:hidden:`pg_loss_with_log_probs`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.losses.pg_loss_with_log_probs\n\n\nReward\n=============\n\n:hidden:`discount_reward`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.losses.discount_reward\n\n\nAdversarial Loss\n==================\n\n:hidden:`binary_adversarial_losses`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.losses.binary_adversarial_losses\n\n\nEntropy\n========\n\n:hidden:`entropy_with_logits`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.losses.entropy_with_logits\n\n:hidden:`sequence_entropy_with_logits`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.losses.sequence_entropy_with_logits\n\n\nLoss Utils\n===========\n\n:hidden:`mask_and_reduce`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.losses.mask_and_reduce\n\n:hidden:`reduce_batch_time`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.losses.reduce_batch_time\n\n:hidden:`reduce_dimensions`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.losses.reduce_dimensions\n"
  },
  {
    "path": "texar_repo/docs/code/models.rst",
    "content": ".. role:: hidden\n    :class: hidden-section\n\nModels\n********\n\nModelBase\n=============\n\n.. autoclass:: texar.models.ModelBase\n    :members:\n\nSeq2seqBase\n===============\n\n.. autoclass:: texar.models.Seq2seqBase\n    :members:\n    :inherited-members:\n\nBasicSeq2seq\n==============\n\n.. autoclass:: texar.models.BasicSeq2seq\n    :members:\n    :inherited-members:\n"
  },
  {
    "path": "texar_repo/docs/code/modules.rst",
    "content": ".. role:: hidden\n    :class: hidden-section\n\nModules\n*******\n\nModuleBase\n===========\n\n.. autoclass:: texar.ModuleBase\n    :members:\n\nEmbedders\n=========\n\n:hidden:`WordEmbedder`\n~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.WordEmbedder\n    :members:\n\n:hidden:`PositionEmbedder`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.PositionEmbedder\n    :members:\n\n:hidden:`SinusoidsPositionEmbedder`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.SinusoidsPositionEmbedder\n    :members:\n\n:hidden:`EmbedderBase`\n~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.EmbedderBase\n    :members:\n\n\nEncoders\n========\n\n:hidden:`UnidirectionalRNNEncoder`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.UnidirectionalRNNEncoder\n    :members:\n\n:hidden:`BidirectionalRNNEncoder`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.BidirectionalRNNEncoder\n    :members:\n\n:hidden:`HierarchicalRNNEncoder`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.HierarchicalRNNEncoder\n    :members:\n\n:hidden:`MultiheadAttentionEncoder`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.MultiheadAttentionEncoder\n    :members:\n\n:hidden:`TransformerEncoder`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.TransformerEncoder\n    :members:\n\n:hidden:`Conv1DEncoder`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.Conv1DEncoder\n    :members:\n\n:hidden:`EncoderBase`\n~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.EncoderBase\n    :members:\n\n:hidden:`RNNEncoderBase`\n~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.RNNEncoderBase\n    :members:\n\n:hidden:`default_transformer_poswise_net_hparams`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.modules.default_transformer_poswise_net_hparams\n\nDecoders\n========\n\n:hidden:`RNNDecoderBase`\n~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.RNNDecoderBase\n    :members:\n    :inherited-members:\n    :exclude-members: initialize,step,finalize,tracks_own_finished,output_size,output_dtype\n\n:hidden:`BasicRNNDecoder`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.BasicRNNDecoder\n    :members:\n    :inherited-members:\n    :exclude-members: initialize,step,finalize,tracks_own_finished,output_size,output_dtype\n\n:hidden:`BasicRNNDecoderOutput`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.BasicRNNDecoderOutput\n    :members:\n\n:hidden:`AttentionRNNDecoder`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.AttentionRNNDecoder\n    :members:\n    :inherited-members:\n    :exclude-members: initialize,step,finalize,tracks_own_finished,output_size,output_dtype\n\n:hidden:`AttentionRNNDecoderOutput`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.AttentionRNNDecoderOutput\n    :members:\n\n:hidden:`beam_search_decode`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.modules.beam_search_decode\n\n:hidden:`TransformerDecoder`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.TransformerDecoder\n    :members:\n\n:hidden:`TransformerDecoderOutput`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.TransformerDecoderOutput\n    :members:\n\n:hidden:`SoftmaxEmbeddingHelper`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.SoftmaxEmbeddingHelper\n    :members:\n\n:hidden:`GumbelSoftmaxEmbeddingHelper`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.GumbelSoftmaxEmbeddingHelper\n    :members:\n\n:hidden:`get_helper`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.modules.get_helper\n\n\nConnectors\n==========\n\n:hidden:`ConnectorBase`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.ConnectorBase\n    :members:\n    :inherited-members:\n\n:hidden:`ConstantConnector`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.ConstantConnector\n    :members:\n    :inherited-members:\n\n:hidden:`ForwardConnector`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.ForwardConnector\n    :members:\n    :inherited-members:\n\n:hidden:`MLPTransformConnector`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.MLPTransformConnector\n    :members:\n    :inherited-members:\n\n:hidden:`ReparameterizedStochasticConnector`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.ReparameterizedStochasticConnector\n    :members:\n    :inherited-members:\n\n:hidden:`StochasticConnector`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.StochasticConnector\n    :members:\n    :inherited-members:\n\n\nClassifiers\n============\n\n:hidden:`Conv1DClassifier`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.Conv1DClassifier\n    :members:\n    :inherited-members:\n\n:hidden:`UnidirectionalRNNClassifier`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.UnidirectionalRNNClassifier\n    :members:\n    :inherited-members:\n\nNetworks\n========\n\n:hidden:`FeedForwardNetworkBase`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.FeedForwardNetworkBase\n    :members:\n    :inherited-members:\n\n:hidden:`FeedForwardNetwork`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.FeedForwardNetwork\n    :members:\n    :inherited-members:\n\n:hidden:`Conv1DNetwork`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.Conv1DNetwork\n    :members:\n    :inherited-members:\n\nMemory\n======\n\n:hidden:`MemNetBase`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.MemNetBase\n    :members:\n    :inherited-members:\n\n:hidden:`MemNetRNNLike`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.MemNetRNNLike\n    :members:\n    :inherited-members:\n    :exclude-members: get_default_embed_fn \n\n:hidden:`default_memnet_embed_fn_hparams`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.modules.default_memnet_embed_fn_hparams\n\nPolicy\n=========\n\n:hidden:`PolicyNetBase`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.PolicyNetBase\n    :members:\n    :inherited-members:\n\n:hidden:`CategoricalPolicyNet`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.CategoricalPolicyNet\n    :members:\n    :inherited-members:\n\nQ-Nets\n=========\n\n:hidden:`QNetBase`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.QNetBase\n    :members:\n    :inherited-members:\n\n:hidden:`CategoricalPolicyNet`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.modules.CategoricalQNet\n    :members:\n    :inherited-members:\n"
  },
  {
    "path": "texar_repo/docs/code/run.rst",
    "content": ".. role:: hidden\n    :class: hidden-section\n\nExecutor\n********\n\n.. autoclass:: texar.run.Executor\n    :members:\n"
  },
  {
    "path": "texar_repo/docs/code/txtgen.rst",
    "content": "Texar\n******\n\n.. automodule:: texar \n"
  },
  {
    "path": "texar_repo/docs/code/utils.rst",
    "content": ".. role:: hidden\n    :class: hidden-section\n\nUtils\n**************\n\nFrequent Use\n============\n\n:hidden:`AverageRecorder`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autoclass:: texar.utils.AverageRecorder\n    :members:\n\n:hidden:`collect_trainable_variables`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.collect_trainable_variables\n\n:hidden:`compat_as_text`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.compat_as_text\n\n:hidden:`map_ids_to_strs`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.map_ids_to_strs\n\n:hidden:`write_paired_text`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.write_paired_text\n\n:hidden:`straight_through`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.straight_through\n\n\nVariables\n=========\n\n:hidden:`collect_trainable_variables`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.collect_trainable_variables\n\n:hidden:`get_unique_named_variable_scope`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.get_unique_named_variable_scope\n\n:hidden:`add_variable`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.add_variable\n\n\nIO\n===\n\n:hidden:`write_paired_text`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.write_paired_text\n\n:hidden:`load_config`\n~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.load_config\n\n:hidden:`maybe_create_dir`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.maybe_create_dir\n\n:hidden:`get_files`\n~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.get_files\n\n\nDType\n=====\n\n:hidden:`compat_as_text`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.compat_as_text\n\n:hidden:`get_tf_dtype`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.get_tf_dtype\n\n:hidden:`is_callable`\n~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.is_callable\n\n:hidden:`is_str`\n~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.is_str\n\n:hidden:`is_placeholder`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.is_placeholder\n\n:hidden:`maybe_hparams_to_dict`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.maybe_hparams_to_dict\n\n\nShape\n=====\n\n:hidden:`mask_sequences`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.mask_sequences\n\n:hidden:`transpose_batch_time`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.transpose_batch_time\n\n:hidden:`get_batch_size`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.get_batch_size\n\n:hidden:`get_rank`\n~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.get_rank\n\n:hidden:`shape_list`\n~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.shape_list\n\n:hidden:`pad_and_concat`\n~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.pad_and_concat\n\n:hidden:`flatten`\n~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.flatten\n\nDictionary\n===========\n\n:hidden:`dict_patch`\n~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.dict_patch\n\n:hidden:`dict_lookup`\n~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.dict_lookup\n\n:hidden:`dict_fetch`\n~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.dict_fetch\n\n:hidden:`dict_pop`\n~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.dict_pop\n\n:hidden:`flatten_dict`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.flatten_dict\n\nString\n=======\n\n:hidden:`map_ids_to_strs`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.map_ids_to_strs\n\n:hidden:`strip_token`\n~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.strip_token\n\n:hidden:`strip_eos`\n~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.strip_eos\n\n:hidden:`strip_special_tokens`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.strip_special_tokens\n\n:hidden:`str_join`\n~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.str_join\n\n:hidden:`default_str`\n~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.default_str\n\n:hidden:`uniquify_str`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.uniquify_str\n\n\nMeta\n====\n\n:hidden:`check_or_get_class`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.check_or_get_class\n\n:hidden:`get_class`\n~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.get_class\n\n:hidden:`check_or_get_instance`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.check_or_get_instance\n\n:hidden:`get_instance`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.get_instance\n\n:hidden:`check_or_get_instance_with_redundant_kwargs`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.check_or_get_instance_with_redundant_kwargs\n\n:hidden:`get_instance_with_redundant_kwargs`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.get_instance_with_redundant_kwargs\n\n:hidden:`get_function`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.get_function\n\n:hidden:`call_function_with_redundant_kwargs`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.call_function_with_redundant_kwargs\n\n:hidden:`get_args`\n~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.get_args\n\n:hidden:`get_default_arg_values`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.get_default_arg_values\n\n:hidden:`get_instance_kwargs`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.get_instance_kwargs\n\n\nMode\n====\n\n:hidden:`switch_dropout`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.switch_dropout\n\n:hidden:`maybe_global_mode`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.maybe_global_mode\n\n:hidden:`is_train_mode`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.is_train_mode\n\n:hidden:`is_eval_mode`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.is_eval_mode\n\n:hidden:`is_predict_mode`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.is_predict_mode\n\n:hidden:`is_train_mode_py`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.is_train_mode_py\n\n:hidden:`is_eval_mode_py`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.is_eval_mode_py\n\n:hidden:`is_predict_mode_py`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.is_predict_mode_py\n\nMisc\n====\n\n:hidden:`ceildiv`\n~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.ceildiv\n\n:hidden:`straight_through`\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n.. autofunction:: texar.utils.straight_through\n\n\nAverageRecorder\n==========================\n.. autoclass:: texar.utils.AverageRecorder\n    :members:\n"
  },
  {
    "path": "texar_repo/docs/conf.py",
    "content": "# -*- coding: utf-8 -*-\n#\n# texar documentation build configuration file, created by\n# sphinx-quickstart on Mon Sep  4 21:15:05 2017.\n#\n# This file is execfile()d with the current directory set to its\n# containing dir.\n#\n# Note that not all possible configuration values are present in this\n# autogenerated file.\n#\n# All configuration values have a default; values that are commented out\n# serve to show the default.\n\nimport sys\nimport os\nfrom recommonmark.parser import CommonMarkParser\n#from unittest.mock import MagicMock\n\n# If extensions (or modules to document with autodoc) are in another directory,\n# add these directories to sys.path here. If the directory is relative to the\n# documentation root, use os.path.abspath to make it absolute, like shown here.\nsys.path.insert(0, os.path.abspath('..'))\n\n# -- General configuration ------------------------------------------------\n\n# If your documentation needs a minimal Sphinx version, state it here.\n#needs_sphinx = '1.0'\n\n# Add any Sphinx extension module names here, as strings. They can be\n# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom\n# ones.\nextensions = [\n    'sphinx.ext.autodoc',\n    'sphinx.ext.doctest',\n    'sphinx.ext.mathjax',\n    'sphinx.ext.viewcode',\n    'sphinx.ext.intersphinx',\n    'sphinx.ext.extlinks',\n    'sphinxcontrib.napoleon',\n]\n\n# Add any paths that contain templates here, relative to this directory.\ntemplates_path = ['_templates']\n\n# The suffix(es) of source filenames.\n# You can specify multiple suffix as a list of string:\nsource_parsers = {\n    '.md': CommonMarkParser,\n}\nsource_suffix = ['.rst', '.md']\n#source_suffix = '.rst'\n\n# The encoding of source files.\n#source_encoding = 'utf-8-sig'\n\n# The master toctree document.\nmaster_doc = 'index'\n\n# General information about the project.\nproject = u'Texar'\ncopyright = u'2018, Texar'\nauthor = u'Texar'\n\n# The version info for the project you're documenting, acts as replacement for\n# |version| and |release|, also used in various other places throughout the\n# built documents.\n#\n# The short X.Y version.\nversion = u'v0.1'\n# The full version, including alpha/beta/rc tags.\nrelease = u'v0.1.0'\n\n# The language for content autogenerated by Sphinx. Refer to documentation\n# for a list of supported languages.\n#\n# This is also used if you do content translation via gettext catalogs.\n# Usually you set \"language\" from the command line for these cases.\nlanguage = None\n\n# There are two options for replacing |today|: either, you set today to some\n# non-false value, then it is used:\n#today = ''\n# Else, today_fmt is used as the format for a strftime call.\n#today_fmt = '%B %d, %Y'\n\n# List of patterns, relative to source directory, that match files and\n# directories to ignore when looking for source files.\n# This patterns also effect to html_static_path and html_extra_path\nexclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']\n\n# The reST default role (used for this markup: `text`) to use for all\n# documents.\n#default_role = None\n\n# If true, '()' will be appended to :func: etc. cross-reference text.\n#add_function_parentheses = True\n\n# If true, the current module name will be prepended to all description\n# unit titles (such as .. function::).\n#add_module_names = True\n\n# If true, sectionauthor and moduleauthor directives will be shown in the\n# output. They are ignored by default.\n#show_authors = False\n\n# The name of the Pygments (syntax highlighting) style to use.\npygments_style = 'sphinx'\n\n# A list of ignored prefixes for module index sorting.\n#modindex_common_prefix = []\n\n# If true, keep warnings as \"system message\" paragraphs in the built documents.\n#keep_warnings = False\n\n# If true, `todo` and `todoList` produce output, else they produce nothing.\ntodo_include_todos = False\n\n\n# -- Options for HTML output ----------------------------------------------\n\n# The theme to use for HTML and HTML Help pages.  See the documentation for\n# a list of builtin themes.\n# html_theme = 'alabaster'\n\nimport sphinx_rtd_theme\nhtml_theme = \"sphinx_rtd_theme\"\nhtml_theme_path = [sphinx_rtd_theme.get_html_theme_path()]\n\n\n# Theme options are theme-specific and customize the look and feel of a theme\n# further.  For a list of options available for each theme, see the\n# documentation.\n#html_theme_options = {}\nhtml_theme_options = {\n    'collapse_navigation': False,\n    'display_version': True,\n    'logo_only': True,\n}\n\n# Add any paths that contain custom themes here, relative to this directory.\n#html_theme_path = []\n\n# The name for this set of Sphinx documents.\n# \"<project> v<release> documentation\" by default.\nhtml_title = u'Texar v0.1'\n\n# A shorter title for the navigation bar.  Default is the same as html_title.\n#html_short_title = None\n\n# The name of an image file (relative to this directory) to place at the top\n# of the sidebar.\nhtml_logo = '_static/img/logo_h.png'\n\n# The name of an image file (relative to this directory) to use as a favicon of\n# the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32\n# pixels large.\n#html_favicon = None\n\n# Add any paths that contain custom static files (such as style sheets) here,\n# relative to this directory. They are copied after the builtin static files,\n# so a file named \"default.css\" will overwrite the builtin \"default.css\".\nhtml_static_path = ['_static']\n\nhtml_context = {\n    'css_files': [\n        'https://fonts.googleapis.com/css?family=Lato',\n        '_static/css/custom_theme.css'\n    ],\n}\n\n# Add any extra paths that contain custom files (such as robots.txt or\n# .htaccess) here, relative to this directory. These files are copied\n# directly to the root of the documentation.\n#html_extra_path = []\n\n# If not None, a 'Last updated on:' timestamp is inserted at every page\n# bottom, using the given strftime format.\n# The empty string is equivalent to '%b %d, %Y'.\n#html_last_updated_fmt = None\n\n# If true, SmartyPants will be used to convert quotes and dashes to\n# typographically correct entities.\n#html_use_smartypants = True\n\n# Custom sidebar templates, maps document names to template names.\n#html_sidebars = {}\n\n# Additional templates that should be rendered to pages, maps page names to\n# template names.\n#html_additional_pages = {}\n\n# If false, no module index is generated.\n#html_domain_indices = True\n\n# If false, no index is generated.\n#html_use_index = True\n\n# If true, the index is split into individual pages for each letter.\n#html_split_index = False\n\n# If true, links to the reST sources are added to the pages.\n#html_show_sourcelink = True\n\n# If true, \"Created using Sphinx\" is shown in the HTML footer. Default is True.\n#html_show_sphinx = True\n\n# If true, \"(C) Copyright ...\" is shown in the HTML footer. Default is True.\n#html_show_copyright = True\n\n# If true, an OpenSearch description file will be output, and all pages will\n# contain a <link> tag referring to it.  The value of this option must be the\n# base URL from which the finished HTML is served.\n#html_use_opensearch = ''\n\n# This is the file name suffix for HTML files (e.g. \".xhtml\").\n#html_file_suffix = None\n\n# Language to be used for generating the HTML full-text search index.\n# Sphinx supports the following languages:\n#   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'\n#   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh'\n#html_search_language = 'en'\n\n# A dictionary with options for the search language support, empty by default.\n# 'ja' uses this config value.\n# 'zh' user can custom change `jieba` dictionary path.\n#html_search_options = {'type': 'default'}\n\n# The name of a javascript file (relative to the configuration directory) that\n# implements a search results scorer. If empty, the default will be used.\n#html_search_scorer = 'scorer.js'\n\n# Output file base name for HTML help builder.\nhtmlhelp_basename = 'texardoc'\n\n# -- Options for LaTeX output ---------------------------------------------\n\nlatex_elements = {\n# The paper size ('letterpaper' or 'a4paper').\n#'papersize': 'letterpaper',\n\n# The font size ('10pt', '11pt' or '12pt').\n#'pointsize': '10pt',\n\n# Additional stuff for the LaTeX preamble.\n#'preamble': '',\n\n# Latex figure (float) alignment\n#'figure_align': 'htbp',\n}\n\n# Grouping the document tree into LaTeX files. List of tuples\n# (source start file, target name, title,\n#  author, documentclass [howto, manual, or own class]).\nlatex_documents = [\n    (master_doc, 'texar.tex', u'Texar Documentation',\n     u'Texar', 'manual'),\n]\n\n# The name of an image file (relative to this directory) to place at the top of\n# the title page.\n#latex_logo = None\n\n# For \"manual\" documents, if this is true, then toplevel headings are parts,\n# not chapters.\n#latex_use_parts = False\n\n# If true, show page references after internal links.\n#latex_show_pagerefs = False\n\n# If true, show URL addresses after external links.\n#latex_show_urls = False\n\n# Documents to append as an appendix to all manuals.\n#latex_appendices = []\n\n# If false, no module index is generated.\n#latex_domain_indices = True\n\n\n# -- Options for manual page output ---------------------------------------\n\n# One entry per manual page. List of tuples\n# (source start file, name, description, authors, manual section).\nman_pages = [\n    (master_doc, 'texar', u'Texar Documentation',\n     [author], 1)\n]\n\n# If true, show URL addresses after external links.\n#man_show_urls = False\n\n\n# -- Options for Texinfo output -------------------------------------------\n\n# Grouping the document tree into Texinfo files. List of tuples\n# (source start file, target name, title, author,\n#  dir menu entry, description, category)\ntexinfo_documents = [\n    (master_doc, 'texar', u'Texar Documentation',\n     author, 'Texar', 'One line description of project.',\n     'Miscellaneous'),\n]\n\n# Documents to append as an appendix to all manuals.\n#texinfo_appendices = []\n\n# If false, no module index is generated.\n#texinfo_domain_indices = True\n\n# How to display URL addresses: 'footnote', 'no', or 'inline'.\n#texinfo_show_urls = 'footnote'\n\n# If true, do not generate a @detailmenu in the \"Top\" node's menu.\n#texinfo_no_detailmenu = False\n\n# Example configuration for intersphinx: refer to the Python standard library.\nintersphinx_mapping = {\n    'python': ('https://docs.python.org/2.7/', None),\n    'numpy': ('http://docs.scipy.org/docs/numpy/', None),\n}\n\nextlinks = {'tf_main': (\n                'https://www.tensorflow.org/api_docs/python/tf/%s',\n                None),\n            'tf_r0.12': (\n                'https://www.tensorflow.org/versions/r0.12/api_docs/python/%s',\n                None),\n            'tf_hmpg': (\n                'https://www.tensorflow.org/%s',\n                None),\n            'gym': (\n                'https://gym.openai.com/docs/%s',\n                None),\n            }\n\n##### Customize ######\n\nautodoc_member_order = 'bysource'\n\n# Adresses import errors. Refer to:\n# https://docs.readthedocs.io/en/latest/faq.html#i-get-import-errors-on-libraries-that-depend-on-c-modules\n#class Mock(MagicMock):\n#    @classmethod\n#    def __getattr__(cls, name):\n#        return MagicMock()\n#MOCK_MODULES = ['gym']\n#sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)\n"
  },
  {
    "path": "texar_repo/docs/examples.md",
    "content": "# Examples #\n\nRich examples are included to demonstrate the use of Texar. The implementations of cutting-edge models/algorithms also provide references for reproducibility and comparisons. \n\nMore examples are continuously added...\n\n## Examples by Models/Algorithms ##\n\n### RNN / Seq2seq ###\n\n* [language_model_ptb](https://github.com/asyml/texar/tree/master/examples/language_model_ptb): Basic RNN language model\n* [seq2seq_attn](https://github.com/asyml/texar/tree/master/examples/seq2seq_attn): Attentional seq2seq\n* [seq2seq_configs](https://github.com/asyml/texar/tree/master/examples/seq2seq_configs): Seq2seq implemented with Texar model template.\n* [seq2seq_rl](https://github.com/asyml/texar/tree/master/examples/seq2seq_rl): Attentional seq2seq trained with policy gradient.\n* [hierarchical_dialog](https://github.com/asyml/texar/tree/master/examples/hierarchical_dialog): Hierarchical recurrent encoder-decoder model for conversation response generation.\n* [torchtext](https://github.com/asyml/texar/tree/master/examples/torchtext): Use of torchtext data loader\n\n### Transformer (Self-attention) ###\n\n* [transformer](https://github.com/asyml/texar/tree/master/examples/transformer): Transformer for machine translation\n* [vae_text](https://github.com/asyml/texar/tree/master/examples/vae_text): VAE with a transformer decoder for improved language modeling \n\n### Variational Autoencoder (VAE) ###\n\n* [vae_text](https://github.com/asyml/texar/tree/master/examples/vae_text): VAE language model\n\n### GANs / Discriminiator-supervision ###\n\n* [seqGAN](https://github.com/asyml/texar/tree/master/examples/seqgan): GANs for text generation\n* [text_style_transfer](https://github.com/asyml/texar/tree/master/examples/text_style_transfer): Discriminator supervision for controlled text generation\n\n### Reinforcement Learning ###\n\n* [seq2seq_rl](https://github.com/asyml/texar/tree/master/examples/seq2seq_rl): Attentional seq2seq trained with policy gradient.\n* [seqGAN](https://github.com/asyml/texar/tree/master/examples/seqgan): Policy gradient for sequence generation\n* [rl_gym](https://github.com/asyml/texar/tree/master/examples/rl_gym): Various RL algoritms for games on OpenAI Gym\n\n### Memory Network ###\n\n* [memory_network_lm](https://github.com/asyml/texar/tree/master/examples/memory_network_lm): End-to-end memory network for language modeling\n\n### Classifier / Predictions ##  \n\n* [sentence_classifier](https://github.com/asyml/texar/tree/master/examples/sentence_classifier): Basic CNN-based sentence classifier\n* [sequence_tagging](https://github.com/asyml/texar/tree/master/examples/sequence_tagging): BiLSTM-CNN model for Named Entity Recognition (NER)\n\n---\n\n## Examples by Tasks\n\n### Language Modeling ###\n\n* [language_model_ptb](https://github.com/asyml/texar/tree/master/examples/language_model_ptb): Basic RNN language model\n* [vae_text](https://github.com/asyml/texar/tree/master/examples/vae_text): VAE language model\n* [seqGAN](https://github.com/asyml/texar/tree/master/examples/seqgan): GAN + policy gradient\n* [memory_network_lm](https://github.com/asyml/texar/tree/master/examples/memory_network_lm): End-to-end memory network for language modeling\n\n### Machine Translation ###\n\n* [seq2seq_attn](https://github.com/asyml/texar/tree/master/examples/seq2seq_attn): Attentional seq2seq\n* [seq2seq_configs](https://github.com/asyml/texar/tree/master/examples/seq2seq_configs): Seq2seq implemented with Texar model template.\n* [seq2seq_rl](https://github.com/asyml/texar/tree/master/examples/seq2seq_rl): Attentional seq2seq trained with policy gradient.\n* [transformer](https://github.com/asyml/texar/tree/master/examples/transformer): Transformer for machine translation\n\n### Dialog ###\n\n* [hierarchical_dialog](https://github.com/asyml/texar/tree/master/examples/hierarchical_dialog): Hierarchical recurrent encoder-decoder model for conversation response generation.\n\n### Text Style Transfer ###\n\n* [text_style_transfer](https://github.com/asyml/texar/tree/master/examples/text_style_transfer): Discriminator supervision for controlled text generation\n\n### Classification ###\n\n* [sentence_classifier](https://github.com/asyml/texar/tree/master/examples/sentence_classifier): Basic CNN-based sentence classifier\n\n### Sequence Tagging ###\n\n* [sequence_tagging](https://github.com/asyml/texar/tree/master/examples/sequence_tagging): BiLSTM-CNN model for Named Entity Recognition (NER)\n\n### Games ###\n\n* [rl_gym](https://github.com/asyml/texar/tree/master/examples/rl_gym): Various RL algoritms for games on OpenAI Gym\n"
  },
  {
    "path": "texar_repo/docs/get_started.md",
    "content": "# Overview #\n\n**Texar** is an open-source toolkit based on Tensorflow, aiming to support a broad set of machine learning especially **text generation tasks**, such as machine translation, dialog, summarization, content manipulation, language modeling, and so on. Texar is designed for both researchers and practitioners for fast prototyping and experimentation.\n \nWith the design goals of **modularity, versatility, and extensibility** in mind, Texar extracts the common patterns underlying the diverse tasks and methodologies, creates a library of highly reusable modules and functionalities, and facilitates **arbitrary model architectures and algorithmic paradigms**, e.g., \n   * encoder(s) to decoder(s), sequential- and self-attentions, memory, hierarchical models, classifiers... \n   * maximum likelihood learning, reinforcement learning, adversarial learning, probabilistic modeling, ... \n\nWith Texar, cutting-edge complex models can be easily constructed, freely enriched with best modeling/training practices, readily fitted into standard training/evaluation pipelines, and fastly experimented and evolved by, e.g., plugging-in and swapping-out different modules.\n\n<div align=\"center\">\n   <img src=\"https://zhitinghu.github.io/texar_web/images/texar_stack.png\"><br><br>\n</div> \n\n### Key Features\n* **Versatility**. Texar contains a wide range of modules and functionalities for composing arbitrary model architectures and implementing various learning algorithms, as well as for data processing, evaluation, prediction, etc.\n* **Modularity**. Texar decomposes diverse complex machine learning models/algorithms into a set of highly-reusable modules. In particular, model **architecture, losses, and learning processes** are fully decomposed.  \nUsers can construct their own models at a high conceptual level just like assembling building blocks. It is convenient to plug in or swap out modules, and configure rich options of each module. For example, switching between maximum likelihood learning and reinforcement learning involves only changing several lines of code.\n* **Extensibility**. It is straightforward to integrate any user-customized, external modules. Also, Texar is fully compatible with the native Tensorflow interfaces and can take advantage of the rich Tensorflow features, and resources from the vibrant open-source community.\n* Interfaces with different functionality levels. Users can customize a model through 1) simple **Python/YAML configuration files** of provided model templates/examples; 2) programming with **Python Library APIs** for maximal customizability.\n* Easy-to-use APIs: 1) Convenient automatic variable re-use---no worry about the complicated TF variable scopes; 2) Pytorch-like callable modules; 3) Rich configuration options for each module, all with default values; ...\n* Well-structured high-quality code of uniform design patterns and consistent styles. \n* Clean, detailed [documentation](https://texar.readthedocs.io) and rich [examples](https://github.com/asyml/texar/tree/master/examples).\n\n### Library API Example\nBuilds a (self-)attentional sequence encoder-decoder model, with different learning algorithms:\n```python\nimport texar as tx\n\n# Data \ndata = tx.data.PairedTextData(hparams=hparams_data) # Hyperparameter configs in `hparams` \niterator = tx.data.DataIterator(data)\nbatch = iterator.get_next() # A data mini-batch\n\n# Model architecture\nembedder = tx.modules.WordEmbedder(data.target_vocab.size, hparams=hparams_emb)\nencoder = tx.modules.TransformerEncoder(hparams=hparams_encoder)\noutputs_enc = encoder(inputs=embedder(batch['source_text_ids']),\n                      sequence_length=batch['source_length'])\n                      \ndecoder = tx.modules.AttentionRNNDecoder(memory=output_enc, \n                                         memory_sequence_length=batch['source_length'],\n                                         hparams=hparams_decoder)\noutputs, _, _ = decoder(inputs=embedder(batch['target_text_ids']),\n                        sequence_length=batch['target_length']-1)\n                        \n# Loss for maximum likelihood learning\nloss = tx.losses.sequence_sparse_softmax_cross_entropy(\n    labels=batch['target_text_ids'][:, 1:],\n    logits=outputs.logits,\n    sequence_length=batch['target_length']-1) # Automatic masks\n\n# Beam search decoding\noutputs_bs, _, _ = tx.modules.beam_search_decode(\n    decoder,\n    embedding=embedder,\n    start_tokens=[data.target_vocab.bos_token_id]*num_samples,\n    end_token=data.target_vocab.eos_token_id)\n```\n```python\n# Policy gradient agent for RL learning\nagent = tx.agents.SeqPGAgent(samples=outputs.sample_id,\n                             logits=outputs.logits,\n                             sequence_length=batch['target_length']-1,\n                             hparams=config_model.agent)\n```\nMany more examples are available [here](https://github.com/asyml/texar/tree/master/examples)\n  \n### Installtion\n```\ngit clone https://github.com/asyml/texar.git\ncd texar\npip install -e .\n```\n\n### Getting Started\n* [Examples](https://github.com/asyml/texar/tree/master/examples)\n* [Documentations](https://texar.readthedocs.io)\n* [GitHub](https://github.com/asyml/texar)\n\n### Reference\nIf you use Texar, please cite the [report](.) with the following BibTex entry:\n```\nTexar: A Modularized, Versatile, and Extensible Toolkit for Text Generation\nZhiting Hu, Haoran Shi, Zichao Yang, Bowen Tan, Tiancheng Zhao, Junxian He, Wentao Wang, Xingjiang Yu, Lianhui Qin, Di Wang, Xuezhe Ma, Hector Liu, Xiaodan Liang, Wanrong Zhu, Devendra Singh Sachan, Eric P. Xing\n2018\n\n@article{hu2018texar, \n  title={Texar: A Modularized, Versatile, and Extensible Toolkit for Text Generation},\n  author={Hu, Zhiting and Shi, Haoran and Yang, Zichao and Tan, Bowen and Zhao, Tiancheng and He, Junxian and Wang, Wentao and Yu, Xingjiang and Qin, Lianhui and Wang, Di and Ma, Xuezhe and Liu, Hector and Liang, Xiaodan and Zhu, Wanrong and Sachan, Devendra Singh and Xing, Eric},\n  year={2018}\n}\n```\n\n### License\n[Apache License 2.0](https://github.com/asyml/texar/blob/master/LICENSE)\n"
  },
  {
    "path": "texar_repo/docs/index.rst",
    "content": ".. texar documentation master file, created by\n   sphinx-quickstart on Mon Sep  4 21:15:05 2017.\n   You can adapt this file completely to your liking, but it should at least\n   contain the root `toctree` directive.\n\nWelcome to Texar's documentation!\n*********************************\n\nTexar is a modularized, versatile, and extensible toolkit for text generation tasks and beyond. \n\n\n.. toctree::\n   :maxdepth: 1\n\n   get_started.md\n\n.. toctree::\n   :maxdepth: 2\n\n   examples.md\n\nAPI\n====\n\n.. toctree::\n   :maxdepth: 2\n\n   code/hyperparams.rst\n   code/data.rst\n   code/core.rst\n   code/modules.rst\n   code/agents.rst\n   code/losses.rst\n   code/evals.rst\n   code/models.rst\n   code/run.rst\n   code/context.rst\n   code/utils.rst\n"
  },
  {
    "path": "texar_repo/docs/make.bat",
    "content": "@ECHO OFF\n\nREM Command file for Sphinx documentation\n\nif \"%SPHINXBUILD%\" == \"\" (\n\tset SPHINXBUILD=sphinx-build\n)\nset BUILDDIR=_build\nset ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .\nset I18NSPHINXOPTS=%SPHINXOPTS% .\nif NOT \"%PAPER%\" == \"\" (\n\tset ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%\n\tset I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%\n)\n\nif \"%1\" == \"\" goto help\n\nif \"%1\" == \"help\" (\n\t:help\n\techo.Please use `make ^<target^>` where ^<target^> is one of\n\techo.  html       to make standalone HTML files\n\techo.  dirhtml    to make HTML files named index.html in directories\n\techo.  singlehtml to make a single large HTML file\n\techo.  pickle     to make pickle files\n\techo.  json       to make JSON files\n\techo.  htmlhelp   to make HTML files and a HTML help project\n\techo.  qthelp     to make HTML files and a qthelp project\n\techo.  devhelp    to make HTML files and a Devhelp project\n\techo.  epub       to make an epub\n\techo.  epub3      to make an epub3\n\techo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter\n\techo.  text       to make text files\n\techo.  man        to make manual pages\n\techo.  texinfo    to make Texinfo files\n\techo.  gettext    to make PO message catalogs\n\techo.  changes    to make an overview over all changed/added/deprecated items\n\techo.  xml        to make Docutils-native XML files\n\techo.  pseudoxml  to make pseudoxml-XML files for display purposes\n\techo.  linkcheck  to check all external links for integrity\n\techo.  doctest    to run all doctests embedded in the documentation if enabled\n\techo.  coverage   to run coverage check of the documentation if enabled\n\techo.  dummy      to check syntax errors of document sources\n\tgoto end\n)\n\nif \"%1\" == \"clean\" (\n\tfor /d %%i in (%BUILDDIR%\\*) do rmdir /q /s %%i\n\tdel /q /s %BUILDDIR%\\*\n\tgoto end\n)\n\n\nREM Check if sphinx-build is available and fallback to Python version if any\n%SPHINXBUILD% 1>NUL 2>NUL\nif errorlevel 9009 goto sphinx_python\ngoto sphinx_ok\n\n:sphinx_python\n\nset SPHINXBUILD=python -m sphinx.__init__\n%SPHINXBUILD% 2> nul\nif errorlevel 9009 (\n\techo.\n\techo.The 'sphinx-build' command was not found. Make sure you have Sphinx\n\techo.installed, then set the SPHINXBUILD environment variable to point\n\techo.to the full path of the 'sphinx-build' executable. Alternatively you\n\techo.may add the Sphinx directory to PATH.\n\techo.\n\techo.If you don't have Sphinx installed, grab it from\n\techo.http://sphinx-doc.org/\n\texit /b 1\n)\n\n:sphinx_ok\n\n\nif \"%1\" == \"html\" (\n\t%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html\n\tif errorlevel 1 exit /b 1\n\techo.\n\techo.Build finished. The HTML pages are in %BUILDDIR%/html.\n\tgoto end\n)\n\nif \"%1\" == \"dirhtml\" (\n\t%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml\n\tif errorlevel 1 exit /b 1\n\techo.\n\techo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.\n\tgoto end\n)\n\nif \"%1\" == \"singlehtml\" (\n\t%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml\n\tif errorlevel 1 exit /b 1\n\techo.\n\techo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.\n\tgoto end\n)\n\nif \"%1\" == \"pickle\" (\n\t%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle\n\tif errorlevel 1 exit /b 1\n\techo.\n\techo.Build finished; now you can process the pickle files.\n\tgoto end\n)\n\nif \"%1\" == \"json\" (\n\t%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json\n\tif errorlevel 1 exit /b 1\n\techo.\n\techo.Build finished; now you can process the JSON files.\n\tgoto end\n)\n\nif \"%1\" == \"htmlhelp\" (\n\t%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp\n\tif errorlevel 1 exit /b 1\n\techo.\n\techo.Build finished; now you can run HTML Help Workshop with the ^\n.hhp project file in %BUILDDIR%/htmlhelp.\n\tgoto end\n)\n\nif \"%1\" == \"qthelp\" (\n\t%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp\n\tif errorlevel 1 exit /b 1\n\techo.\n\techo.Build finished; now you can run \"qcollectiongenerator\" with the ^\n.qhcp project file in %BUILDDIR%/qthelp, like this:\n\techo.^> qcollectiongenerator %BUILDDIR%\\qthelp\\texar.qhcp\n\techo.To view the help file:\n\techo.^> assistant -collectionFile %BUILDDIR%\\qthelp\\texar.ghc\n\tgoto end\n)\n\nif \"%1\" == \"devhelp\" (\n\t%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp\n\tif errorlevel 1 exit /b 1\n\techo.\n\techo.Build finished.\n\tgoto end\n)\n\nif \"%1\" == \"epub\" (\n\t%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub\n\tif errorlevel 1 exit /b 1\n\techo.\n\techo.Build finished. The epub file is in %BUILDDIR%/epub.\n\tgoto end\n)\n\nif \"%1\" == \"epub3\" (\n\t%SPHINXBUILD% -b epub3 %ALLSPHINXOPTS% %BUILDDIR%/epub3\n\tif errorlevel 1 exit /b 1\n\techo.\n\techo.Build finished. The epub3 file is in %BUILDDIR%/epub3.\n\tgoto end\n)\n\nif \"%1\" == \"latex\" (\n\t%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex\n\tif errorlevel 1 exit /b 1\n\techo.\n\techo.Build finished; the LaTeX files are in %BUILDDIR%/latex.\n\tgoto end\n)\n\nif \"%1\" == \"latexpdf\" (\n\t%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex\n\tcd %BUILDDIR%/latex\n\tmake all-pdf\n\tcd %~dp0\n\techo.\n\techo.Build finished; the PDF files are in %BUILDDIR%/latex.\n\tgoto end\n)\n\nif \"%1\" == \"latexpdfja\" (\n\t%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex\n\tcd %BUILDDIR%/latex\n\tmake all-pdf-ja\n\tcd %~dp0\n\techo.\n\techo.Build finished; the PDF files are in %BUILDDIR%/latex.\n\tgoto end\n)\n\nif \"%1\" == \"text\" (\n\t%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text\n\tif errorlevel 1 exit /b 1\n\techo.\n\techo.Build finished. The text files are in %BUILDDIR%/text.\n\tgoto end\n)\n\nif \"%1\" == \"man\" (\n\t%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man\n\tif errorlevel 1 exit /b 1\n\techo.\n\techo.Build finished. The manual pages are in %BUILDDIR%/man.\n\tgoto end\n)\n\nif \"%1\" == \"texinfo\" (\n\t%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo\n\tif errorlevel 1 exit /b 1\n\techo.\n\techo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.\n\tgoto end\n)\n\nif \"%1\" == \"gettext\" (\n\t%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale\n\tif errorlevel 1 exit /b 1\n\techo.\n\techo.Build finished. The message catalogs are in %BUILDDIR%/locale.\n\tgoto end\n)\n\nif \"%1\" == \"changes\" (\n\t%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes\n\tif errorlevel 1 exit /b 1\n\techo.\n\techo.The overview file is in %BUILDDIR%/changes.\n\tgoto end\n)\n\nif \"%1\" == \"linkcheck\" (\n\t%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck\n\tif errorlevel 1 exit /b 1\n\techo.\n\techo.Link check complete; look for any errors in the above output ^\nor in %BUILDDIR%/linkcheck/output.txt.\n\tgoto end\n)\n\nif \"%1\" == \"doctest\" (\n\t%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest\n\tif errorlevel 1 exit /b 1\n\techo.\n\techo.Testing of doctests in the sources finished, look at the ^\nresults in %BUILDDIR%/doctest/output.txt.\n\tgoto end\n)\n\nif \"%1\" == \"coverage\" (\n\t%SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage\n\tif errorlevel 1 exit /b 1\n\techo.\n\techo.Testing of coverage in the sources finished, look at the ^\nresults in %BUILDDIR%/coverage/python.txt.\n\tgoto end\n)\n\nif \"%1\" == \"xml\" (\n\t%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml\n\tif errorlevel 1 exit /b 1\n\techo.\n\techo.Build finished. The XML files are in %BUILDDIR%/xml.\n\tgoto end\n)\n\nif \"%1\" == \"pseudoxml\" (\n\t%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml\n\tif errorlevel 1 exit /b 1\n\techo.\n\techo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.\n\tgoto end\n)\n\nif \"%1\" == \"dummy\" (\n\t%SPHINXBUILD% -b dummy %ALLSPHINXOPTS% %BUILDDIR%/dummy\n\tif errorlevel 1 exit /b 1\n\techo.\n\techo.Build finished. Dummy builder generates no files.\n\tgoto end\n)\n\n:end\n"
  },
  {
    "path": "texar_repo/docs/requirements.txt",
    "content": "sphinx\nsphinx-rtd-theme >= 0.2.4\nsphinxcontrib-napoleon >= 0.6.1\nPygments >= 2.1.1\ntensorflow >= 1.7.0\npyyaml\nfuncsigs\n"
  },
  {
    "path": "texar_repo/docs/tutorials/tutorial.rst",
    "content": "Getting Started\n===============\n\nWrite an awesome tutorial here."
  },
  {
    "path": "texar_repo/examples/README.md",
    "content": "# Examples #\n\nRich examples are included to demonstrate the use of Texar. The implementations of cutting-edge models/algorithms also provide references for reproducibility and comparisons. \n\nMore examples are continuously added...\n\n## Examples by Models/Algorithms ##\n\n### RNN / Seq2seq ###\n\n* [language_model_ptb](./language_model_ptb): Basic RNN language model\n* [distributed_gpu](./distributed_gpu): Basic RNN language model with distributed training\n* [seq2seq_attn](./seq2seq_attn): Attentional seq2seq\n* [seq2seq_configs](./seq2seq_configs): Seq2seq implemented with Texar model template\n* [seq2seq_rl](./seq2seq_rl): Attentional seq2seq trained with policy gradient\n* [seq2seq_exposure_bias](./seq2seq_exposure_bias): Various algorithms tackling exposure bias in sequence generation\n* [hierarchical_dialog](./hierarchical_dialog): Hierarchical recurrent encoder-decoder model for conversation response generation\n* [torchtext](./torchtext): Use of torchtext data loader\n\n### Transformer (Self-attention) ###\n\n* [transformer](./transformer): Transformer for machine translation\n* [bert](./bert): Pre-trained BERT model for text representation\n* [vae_text](./vae_text): VAE with a transformer decoder for improved language modeling \n\n### Variational Autoencoder (VAE) ###\n\n* [vae_text](./vae_text): VAE language model\n\n### GANs / Discriminiator-supervision ###\n\n* [seqGAN](./seqgan): GANs for text generation\n* [text_style_transfer](./text_style_transfer): Discriminator supervision for controlled text generation\n\n### Reinforcement Learning ###\n\n* [seq2seq_rl](./seq2seq_rl): Attentional seq2seq trained with policy gradient.\n* [seqGAN](./seqgan): Policy gradient for sequence generation\n* [rl_gym](./rl_gym): Various RL algoritms for games on OpenAI Gym\n\n### Memory Network ###\n\n* [memory_network_lm](./memory_network_lm): End-to-end memory network for language modeling\n\n### Classifier / Sequence Prediction ###  \n\n* [bert](./bert): Pre-trained BERT model for text representation\n* [sentence_classifier](./sentence_classifier): Basic CNN-based sentence classifier\n* [sequence_tagging](./sequence_tagging): BiLSTM-CNN model for Named Entity Recognition (NER)\n\n### Reward Augmented Maximum Likelihood (RAML) ###\n\n* [seq2seq_exposure_bias](./seq2seq_exposure_bias): RAML and other learning algorithms for sequence generation \n\n---\n\n## Examples by Tasks\n\n### Language Modeling ###\n\n* [language_model_ptb](./language_model_ptb): Basic RNN language model\n* [vae_text](./vae_text): VAE language model\n* [seqGAN](./seqgan): GAN + policy gradient\n* [memory_network_lm](./memory_network_lm): End-to-end memory network for language modeling\n\n### Machine Translation ###\n\n* [seq2seq_attn](./seq2seq_attn): Attentional seq2seq\n* [seq2seq_configs](./seq2seq_configs): Seq2seq implemented with Texar model template.\n* [seq2seq_rl](./seq2seq_rl): Attentional seq2seq trained with policy gradient.\n* [seq2seq_exposure_bias](./seq2seq_exposure_bias): Various algorithms tackling exposure bias in sequence generation (MT and summarization as examples).\n* [transformer](./transformer): Transformer for machine translation\n\n### Dialog ###\n\n* [hierarchical_dialog](./hierarchical_dialog): Hierarchical recurrent encoder-decoder model for conversation response generation.\n\n### Text Summarization ###\n\n* [seq2seq_exposure_bias](./seq2seq_exposure_bias): Various algorithms tackling exposure bias in sequence generation (MT and summarization as examples).\n\n### Text Style Transfer ###\n\n* [text_style_transfer](./text_style_transfer): Discriminator supervision for controlled text generation\n\n### Classification ###\n\n* [bert](./bert): Pre-trained BERT model for text representation\n* [sentence_classifier](./sentence_classifier): Basic CNN-based sentence classifier\n\n### Sequence Tagging ###\n\n* [sequence_tagging](./sequence_tagging): BiLSTM-CNN model for Named Entity Recognition (NER)\n\n### Games ###\n\n* [rl_gym](./rl_gym): Various RL algoritms for games on OpenAI Gym\n\n---\n\n## MISC ##\n\n### Distributed training ###\n\n* [distributed_gpu](./distributed_gpu): Basic example of distributed training.\n* [bert](./bert): Distributed training of BERT.\n\n"
  },
  {
    "path": "texar_repo/examples/bert/README.md",
    "content": "# BERT: Pre-trained models and downstream applications\n\nThis is a Texar implementation of Google's BERT model, which allows to load pre-trained model parameters downloaded from the [official releaes](https://github.com/google-research/bert) and build/fine-tune arbitrary downstream applications with **distributed training** (This example showcases BERT for sentence classification).\n\nWith Texar, building the BERT model is as simple as creating a [`TransformerEncoder`](https://texar.readthedocs.io/en/latest/code/modules.html#transformerencoder) instance. We can initialize the parameters of the TransformerEncoder using a pre-trained BERT checkpoint by calling `init_bert_checkpoint(path_to_bert_checkpoint)`. \n\nIn sum, this example showcases:\n\n* Use of pre-trained Google BERT models in Texar\n* Building and fine-tuning on downstream tasks\n* Distributed training of the models\n\n## Quick Start\n\n### Download Dataset\n\nWe explain the use of the example code based on the Microsoft Research Paraphrase Corpus (MRPC) corpus for sentence classification. \n\nDownload the data with the following cmd\n```\npython data/download_glue_data.py --tasks=MRPC\n```\nBy default, it will download the MRPC dataset into the `data` directory. FYI, the MRPC dataset part of the [GLUE](https://gluebenchmark.com/tasks) dataset collection.\n\n### Download BERT Pre-train Model\n\n```\nsh bert_pretrained_models/download_model.sh\n```\nBy default, it will download a pretrained model (BERT-Base Uncased: 12-layer, 768-hidden, 12-heads, 110M parameters) named `uncased_L-12_H-768_A-12` to `bert_pretrained_models/`.\n\nUnder `bert_pretrained_models/uncased_L-12_H-768_A-12`, you can find 5 files, where\n- `bert-config.json` is the model configuration of the BERT model. For the particular model we just downloaded, it is an uncased-vocabulary, 12-layer, 768-hidden, 12-heads Transformer model.\n\n### Train and Evaluate\n\nFor **single-GPU** training (and evaluation), run the following cmd. The training updates the classification layer and fine-tunes the pre-trained BERT parameters.\n```\n    python bert_classifier_main.py --do_train --do_eval\n    [--task=mrpc]\n    [--config_bert_pretrain=uncased_L-12_H-768_A-12]\n    [--config_downstream=config_classifier]\n    [--config_data=config_data_mrpc]\n    [--output_dir=output] \n```\nHere:\n\n- `task`: Specifies which dataset to experiment on.\n- `config_bert_pretrain`: Specifies the architecture of pre-trained BERT model to use.\n- `config_downstream`: Configuration of the downstream part. In this example, [`config_classifier.py`](https://github.com/asyml/texar/blob/master/examples/bert/bert_classifier_main.py) configs the classification layer and the optimization method.\n- `config_data`: The data configuration.\n- `output_dir`: The output path where checkpoints and summaries for tensorboard visualization are saved.\n\nFor **Multi-GPU training** on one or multiple machines, you may first install the prerequisite OpenMPI and Hovorod packages, as detailed in the [distributed_gpu](https://github.com/asyml/texar/tree/master/examples/distributed_gpu) example. \n\nThen run the following cmd for training and evaluation. The cmd trains the model on local with 2 GPUs. Evaluation is performed with the single rank-0 GPU.\n```\nmpirun -np 2 \\\n    -H  localhost:2\\\n    -bind-to none -map-by slot \\\n    -x NCCL_DEBUG=INFO -x LD_LIBRARY_PATH -x PATH \\\n    -mca pml ob1 -mca btl tcp,self \\\n    -mca btl_tcp_if_include ens3 \\\n    python bert_classifier_main.py --do_train --do_eval --distributed\n    [--task=mrpc]\n    [--config_bert_pretrain=uncased_L-12_H-768_A-12]\n    [--config_downstream=config_classifier]\n    [--config_data=config_data_mrpc]\n    [--output_dir=output] \n```\nThe key configurations of multi-gpu training:\n\n* `-np`: total number of processes\n* `-H`: IP addresses of different servers and the number of processes used in each server. For example, `-H 192.168.11.22:1,192.168.33.44:1`\n\nPlease refer to [distributed_gpu](https://github.com/asyml/texar/tree/master/examples/distributed_gpu) example for more details of the other multi-gpu configurations.\n\nNote that we also specified the `--distributed` flag for multi-gpu training.\n\n&nbsp;\n\nAfter convergence, the evaluation performance is around the following. Due to certain randomness (e.g., random initialization of the classification layer), the evaluation accuracy is reasonable as long as it's `>0.84`.\n```\nINFO:tensorflow:dev accu: 0.8676470588235294\n```\n\n### Restore and Test\n\n``\npython bert_classifier_main.py --do_test --checkpoint=output/model.ckpt\n``\n\nThe output is by default saved in `output/test_results.tsv`, where each line contains the predicted label for each sample.\n\n\n## Use other datasets/tasks\n\n`bert_classifier_main.py` also support other datasets/tasks. To do this, specify a different value to the `--task` flag, and use a corresponding data configuration file. \n\nFor example, use the following commands to download the SST (Stanford Sentiment Treebank) dataset and run for sentence classification.\n```\npython data/download_glue_data.py --tasks=SST\npython bert_classifier_main.py --do_train --do_eval --task=sst --config_data=config_data_sst\n```\n"
  },
  {
    "path": "texar_repo/examples/bert/bert_classifier_main.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Example of building a sentence classifier based on pre-trained BERT\nmodel.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\nimport importlib\nimport tensorflow as tf\nimport texar as tx\n\nfrom utils import data_utils, model_utils, tokenization\n\n# pylint: disable=invalid-name, too-many-locals, too-many-statements\n\nflags = tf.flags\n\nFLAGS = flags.FLAGS\nflags.DEFINE_string(\n    \"task\", \"mrpc\",\n    \"The task to run experiment on. One of \"\n    \"{'cola', 'mnli', 'mrpc', 'xnli', 'sst'}.\")\nflags.DEFINE_string(\n    \"config_bert_pretrain\", 'uncased_L-12_H-768_A-12',\n    \"The architecture of pre-trained BERT model to use.\")\nflags.DEFINE_string(\n    \"config_format_bert\", \"json\",\n    \"The configuration format. Set to 'json' if the BERT config file is in \"\n    \"the same format of the official BERT config file. Set to 'texar' if the \"\n    \"BERT config file is in Texar format.\")\nflags.DEFINE_string(\n    \"config_downstream\", \"config_classifier\",\n    \"Configuration of the downstream part of the model and optmization.\")\nflags.DEFINE_string(\n    \"config_data\", \"config_data_mrpc\",\n    \"The dataset config.\")\nflags.DEFINE_string(\n    \"checkpoint\", None,\n    \"Path to a model checkpoint (including bert modules) to restore from.\")\nflags.DEFINE_string(\n    \"output_dir\", \"output/\",\n    \"The output directory where the model checkpoints will be written.\")\nflags.DEFINE_bool(\n    \"do_lower_case\", True,\n    \"Whether to lower case the input text. Should be True for uncased \"\n    \"models and False for cased models.\")\nflags.DEFINE_bool(\"do_train\", False, \"Whether to run training.\")\nflags.DEFINE_bool(\"do_eval\", False, \"Whether to run eval on the dev set.\")\nflags.DEFINE_bool(\"do_test\", False, \"Whether to run test on the test set.\")\nflags.DEFINE_bool(\"distributed\", False, \"Whether to run in distributed mode.\")\n\nconfig_data = importlib.import_module(FLAGS.config_data)\nconfig_downstream = importlib.import_module(FLAGS.config_downstream)\n\ndef main(_):\n    \"\"\"\n    Builds the model and runs.\n    \"\"\"\n\n    if FLAGS.distributed:\n        import horovod.tensorflow as hvd\n        hvd.init()\n\n    tf.logging.set_verbosity(tf.logging.INFO)\n    tx.utils.maybe_create_dir(FLAGS.output_dir)\n    bert_pretrain_dir = 'bert_pretrained_models/%s' % FLAGS.config_bert_pretrain\n\n    # Loads BERT model configuration\n    if FLAGS.config_format_bert == \"json\":\n        bert_config = model_utils.transform_bert_to_texar_config(\n            os.path.join(bert_pretrain_dir, 'bert_config.json'))\n    elif FLAGS.config_format_bert == 'texar':\n        bert_config = importlib.import_module(\n            'bert_config_lib.config_model_%s' % FLAGS.config_bert_pretrain)\n    else:\n        raise ValueError('Unknown config_format_bert.')\n\n    # Loads data\n    processors = {\n        \"cola\": data_utils.ColaProcessor,\n        \"mnli\": data_utils.MnliProcessor,\n        \"mrpc\": data_utils.MrpcProcessor,\n        \"xnli\": data_utils.XnliProcessor,\n        'sst': data_utils.SSTProcessor\n    }\n\n    processor = processors[FLAGS.task.lower()]()\n\n    num_classes = len(processor.get_labels())\n    num_train_data = len(processor.get_train_examples(config_data.data_dir))\n\n    tokenizer = tokenization.FullTokenizer(\n        vocab_file=os.path.join(bert_pretrain_dir, 'vocab.txt'),\n        do_lower_case=FLAGS.do_lower_case)\n\n    train_dataset = data_utils.get_dataset(\n        processor, tokenizer, config_data.data_dir, config_data.max_seq_length,\n        config_data.train_batch_size, mode='train', output_dir=FLAGS.output_dir,\n        is_distributed=FLAGS.distributed)\n\n    eval_dataset = data_utils.get_dataset(\n        processor, tokenizer, config_data.data_dir, config_data.max_seq_length,\n        config_data.eval_batch_size, mode='eval', output_dir=FLAGS.output_dir)\n    test_dataset = data_utils.get_dataset(\n        processor, tokenizer, config_data.data_dir, config_data.max_seq_length,\n        config_data.test_batch_size, mode='test', output_dir=FLAGS.output_dir)\n\n    iterator = tx.data.FeedableDataIterator({\n        'train': train_dataset, 'eval': eval_dataset, 'test': test_dataset})\n    batch = iterator.get_next()\n    input_ids = batch[\"input_ids\"]\n    segment_ids = batch[\"segment_ids\"]\n    batch_size = tf.shape(input_ids)[0]\n    input_length = tf.reduce_sum(1 - tf.to_int32(tf.equal(input_ids, 0)),\n                                 axis=1)\n\n    # Builds BERT\n    with tf.variable_scope('bert'):\n        embedder = tx.modules.WordEmbedder(\n            vocab_size=bert_config.vocab_size,\n            hparams=bert_config.embed)\n        word_embeds = embedder(input_ids)\n\n        # Creates segment embeddings for each type of tokens.\n        segment_embedder = tx.modules.WordEmbedder(\n            vocab_size=bert_config.type_vocab_size,\n            hparams=bert_config.segment_embed)\n        segment_embeds = segment_embedder(segment_ids)\n\n        input_embeds = word_embeds + segment_embeds\n\n        # The BERT model (a TransformerEncoder)\n        encoder = tx.modules.TransformerEncoder(hparams=bert_config.encoder)\n        output = encoder(input_embeds, input_length)\n\n        # Builds layers for downstream classification, which is also initialized\n        # with BERT pre-trained checkpoint.\n        with tf.variable_scope(\"pooler\"):\n            # Uses the projection of the 1st-step hidden vector of BERT output\n            # as the representation of the sentence\n            bert_sent_hidden = tf.squeeze(output[:, 0:1, :], axis=1)\n            bert_sent_output = tf.layers.dense(\n                bert_sent_hidden, config_downstream.hidden_dim,\n                activation=tf.tanh)\n            output = tf.layers.dropout(\n                bert_sent_output, rate=0.1, training=tx.global_mode_train())\n\n    # Adds the final classification layer\n    logits = tf.layers.dense(\n        output, num_classes,\n        kernel_initializer=tf.truncated_normal_initializer(stddev=0.02))\n    preds = tf.argmax(logits, axis=-1, output_type=tf.int32)\n    accu = tx.evals.accuracy(batch['label_ids'], preds)\n\n    # Optimization\n\n    loss = tf.losses.sparse_softmax_cross_entropy(\n        labels=batch[\"label_ids\"], logits=logits)\n    global_step = tf.Variable(0, trainable=False)\n\n    # Builds learning rate decay scheduler\n    static_lr = config_downstream.lr['static_lr']\n    num_train_steps = int(num_train_data / config_data.train_batch_size\n                          * config_data.max_train_epoch)\n    num_warmup_steps = int(num_train_steps * config_data.warmup_proportion)\n    lr = model_utils.get_lr(global_step, num_train_steps, # lr is a Tensor\n                            num_warmup_steps, static_lr)\n\n    opt = tx.core.get_optimizer(\n        global_step=global_step,\n        learning_rate=lr,\n        hparams=config_downstream.opt\n    )\n\n    if FLAGS.distributed:\n        opt = hvd.DistributedOptimizer(opt)\n\n    train_op = tf.contrib.layers.optimize_loss(\n        loss=loss,\n        global_step=global_step,\n        learning_rate=None,\n        optimizer=opt)\n\n    # Train/eval/test routine\n\n    def _run(sess, mode):\n        fetches = {\n            'accu': accu,\n            'batch_size': batch_size,\n            'step': global_step,\n            'loss': loss,\n            'input_ids': input_ids,\n        }\n\n        if mode == 'train':\n            fetches['train_op'] = train_op\n            while True:\n                try:\n                    feed_dict = {\n                        iterator.handle: iterator.get_handle(sess, 'train'),\n                        tx.global_mode(): tf.estimator.ModeKeys.TRAIN,\n                    }\n                    rets = sess.run(fetches, feed_dict)\n                    if rets['step'] % 50 == 0:\n                        tf.logging.info(\n                            'step:%d loss:%f' % (rets['step'], rets['loss']))\n                    if rets['step'] == num_train_steps:\n                        break\n                except tf.errors.OutOfRangeError:\n                    break\n\n        if mode == 'eval':\n            cum_acc = 0.0\n            nsamples = 0\n            while True:\n                try:\n                    feed_dict = {\n                        iterator.handle: iterator.get_handle(sess, 'eval'),\n                        tx.context.global_mode(): tf.estimator.ModeKeys.EVAL,\n                    }\n                    rets = sess.run(fetches, feed_dict)\n\n                    cum_acc += rets['accu'] * rets['batch_size']\n                    nsamples += rets['batch_size']\n                except tf.errors.OutOfRangeError:\n                    break\n\n            tf.logging.info('dev accu: {} nsamples: {}'.format(cum_acc / nsamples, nsamples))\n\n        if mode == 'test':\n            _all_preds = []\n            while True:\n                try:\n                    feed_dict = {\n                        iterator.handle: iterator.get_handle(sess, 'test'),\n                        tx.context.global_mode(): tf.estimator.ModeKeys.PREDICT,\n                    }\n                    _preds = sess.run(preds, feed_dict=feed_dict)\n                    _all_preds.extend(_preds.tolist())\n                except tf.errors.OutOfRangeError:\n                    break\n\n            output_file = os.path.join(FLAGS.output_dir, \"test_results.tsv\")\n            with tf.gfile.GFile(output_file, \"w\") as writer:\n                writer.write('\\n'.join(str(p) for p in _all_preds))\n\n    # Loads pretrained BERT model parameters\n    init_checkpoint = os.path.join(bert_pretrain_dir, 'bert_model.ckpt')\n    model_utils.init_bert_checkpoint(init_checkpoint)\n\n    # broadcast global variables from rank-0 process\n    if FLAGS.distributed:\n        bcast = hvd.broadcast_global_variables(0)\n\n    with tf.Session() as sess:\n\n        sess.run(tf.global_variables_initializer())\n        sess.run(tf.local_variables_initializer())\n        sess.run(tf.tables_initializer())\n\n        if FLAGS.distributed:\n            bcast.run()\n\n        # Restores trained model if specified\n        saver = tf.train.Saver()\n        if FLAGS.checkpoint:\n            saver.restore(sess, FLAGS.checkpoint)\n\n        iterator.initialize_dataset(sess)\n\n        if FLAGS.do_train:\n            iterator.restart_dataset(sess, 'train')\n            _run(sess, mode='train')\n            saver.save(sess, FLAGS.output_dir + '/model.ckpt')\n\n        if FLAGS.do_eval:\n            iterator.restart_dataset(sess, 'eval')\n            _run(sess, mode='eval')\n\n        if FLAGS.do_test:\n            iterator.restart_dataset(sess, 'test')\n            _run(sess, mode='test')\n\nif __name__ == \"__main__\":\n    tf.app.run()\n"
  },
  {
    "path": "texar_repo/examples/bert/bert_config_lib/README.md",
    "content": "### Configuration files of BERT models in Texar style.\n\nFor example, `config_model_uncased_L-12_H-768_A-12.py` is the Texar configuration file equivalent to `uncased_L-12_H-768_A-12` downloaded from [BERT official release](https://github.com/haoransh/texar_private/tree/master/examples/bert).\n"
  },
  {
    "path": "texar_repo/examples/bert/bert_config_lib/__init__.py",
    "content": ""
  },
  {
    "path": "texar_repo/examples/bert/bert_config_lib/config_model_uncased_L-12_H-768_A-12.py",
    "content": "embed = {\n    'dim': 768,\n    'name': 'word_embeddings'\n}\nvocab_size = 30522\n\nsegment_embed = {\n    'dim': 768,\n    'name': 'token_type_embeddings'\n}\ntype_vocab_size = 2\n\nencoder = {\n    'dim': 768,\n    'embedding_dropout': 0.1,\n    'multihead_attention': {\n        'dropout_rate': 0.1,\n        'name': 'self',\n        'num_heads': 12,\n        'num_units': 768,\n        'output_dim': 768,\n        'use_bias': True\n    },\n    'name': 'encoder',\n    'num_blocks': 12,\n    'position_embedder_hparams': {\n        'dim': 768\n    },\n    'position_embedder_type': 'variables',\n    'position_size': 512,\n    'poswise_feedforward': {\n        'layers': [\n            {   'kwargs': {\n                    'activation': 'gelu',\n                    'name': 'intermediate',\n                    'units': 3072,\n                    'use_bias': True\n                },\n                'type': 'Dense'\n            },\n            {   'kwargs': {'activation': None,\n                'name': 'output',\n                'units': 768,\n                'use_bias': True\n                },\n                'type': 'Dense'\n            }\n        ]\n    },\n    'residual_dropout': 0.1,\n    'use_bert_config': True\n}\n\noutput_size = 768 # The output dimension of BERT\n"
  },
  {
    "path": "texar_repo/examples/bert/config_classifier.py",
    "content": "hidden_dim = 768\n\nopt = {\n    'optimizer': {\n        'type': 'AdamWeightDecayOptimizer',\n        'kwargs': {\n            'weight_decay_rate': 0.01,\n            'beta_1': 0.9,\n            'beta_2': 0.999,\n            'epsilon': 1e-6,\n            'exclude_from_weight_decay': ['LayerNorm', 'layer_norm', 'bias']\n        }\n    },\n    'gradient_clip': {\n        'type': 'clip_by_global_norm',\n        'kwargs': {\n            'clip_norm': 1.0,\n        }\n    }\n}\n\n# By default, we use warmup and linear decay for learinng rate\nlr = {\n    'static_lr': 2e-5,\n}\n"
  },
  {
    "path": "texar_repo/examples/bert/config_data_mrpc.py",
    "content": "data_dir = 'data/MRPC'\ntrain_batch_size = 32\nmax_seq_length = 128\neval_batch_size = 8\ntest_batch_size = 8\nmax_train_epoch = 3\nwarmup_proportion = 0.1\n"
  },
  {
    "path": "texar_repo/examples/bert/config_data_sst.py",
    "content": "data_dir = 'data/SST-2'\ntrain_batch_size = 32\nmax_seq_length = 128\neval_batch_size = 8\ntest_batch_size = 8\nmax_train_epoch = 3\nwarmup_proportion = 0.1\n"
  },
  {
    "path": "texar_repo/examples/bert/utils/data_utils.py",
    "content": "\"\"\"\nThis is the Data Loading Pipeline for Sentence Classifier Task from\nhttps://github.com/google-research/bert/blob/master/run_classifier.py\n\"\"\"\n# coding=utf-8\n# Copyright 2018 The Google AI Language Team Authors.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nimport os\nimport csv\nimport collections\nimport sys\nsys.path.append(os.path.dirname(__file__))\nimport tokenization\nimport tensorflow as tf\n\nclass InputExample():\n    \"\"\"A single training/test example for simple sequence classification.\"\"\"\n\n    def __init__(self, guid, text_a, text_b=None, label=None):\n        \"\"\"Constructs a InputExample.\n        Args:\n            guid: Unique id for the example.\n            text_a: string. The untokenized text of the first sequence.\n                For single sequence tasks, only this sequence must be specified.\n            text_b: (Optional) string. The untokenized text of the second\n                sequence. Only must be specified for sequence pair tasks.\n            label: (Optional) string. The label of the example. This should be\n                specified for train and dev examples, but not for test examples.\n        \"\"\"\n        self.guid = guid\n        self.text_a = text_a\n        self.text_b = text_b\n        self.label = label\n\n\nclass InputFeatures():\n    \"\"\"A single set of features of data.\"\"\"\n\n    def __init__(self, input_ids, input_mask, segment_ids, label_id):\n        self.input_ids = input_ids\n        self.input_mask = input_mask\n        self.segment_ids = segment_ids\n        self.label_id = label_id\n\n\nclass DataProcessor(object):\n    \"\"\"Base class for data converters for sequence classification data sets.\"\"\"\n\n    def get_train_examples(self, data_dir):\n        \"\"\"Gets a collection of `InputExample`s for the train set.\"\"\"\n        raise NotImplementedError()\n\n    def get_dev_examples(self, data_dir):\n        \"\"\"Gets a collection of `InputExample`s for the dev set.\"\"\"\n        raise NotImplementedError()\n\n    def get_test_examples(self, data_dir):\n        \"\"\"Gets a collection of `InputExample`s for prediction.\"\"\"\n        raise NotImplementedError()\n\n    def get_labels(self):\n        \"\"\"Gets the list of labels for this data set.\"\"\"\n        raise NotImplementedError()\n\n    @classmethod\n    def _read_tsv(cls, input_file, quotechar=None):\n        \"\"\"Reads a tab separated value file.\"\"\"\n        with tf.gfile.Open(input_file, \"r\") as f:\n            reader = csv.reader(f, delimiter=\"\\t\", quotechar=quotechar)\n            lines = []\n            i = 0\n            for line in reader:\n                lines.append(line)\n        return lines\n\nclass SSTProcessor(DataProcessor):\n    \"\"\"Processor for the MRPC data set (GLUE version).\"\"\"\n\n    def get_train_examples(self, data_dir):\n        \"\"\"See base class.\"\"\"\n        return self._create_examples(\n            self._read_tsv(os.path.join(data_dir, \"train.tsv\")), \"train\")\n\n    def get_dev_examples(self, data_dir):\n        \"\"\"See base class.\"\"\"\n        return self._create_examples(\n            self._read_tsv(os.path.join(data_dir, \"dev.tsv\")), \"dev\")\n\n    def get_test_examples(self, data_dir):\n        \"\"\"See base class.\"\"\"\n        return self._create_examples(\n            self._read_tsv(os.path.join(data_dir, \"test.tsv\")), \"test\")\n\n    def get_labels(self):\n        \"\"\"See base class.\"\"\"\n        return [\"0\", \"1\"]\n\n    def _create_examples(self, lines, set_type):\n        \"\"\"Creates examples for the training and dev sets.\"\"\"\n        examples = []\n        if set_type == 'train' or set_type == 'dev':\n            for (i, line) in enumerate(lines):\n                if i == 0:\n                    continue\n                guid = \"%s-%s\" % (set_type, i)\n                text_a = tokenization.convert_to_unicode(line[0])\n                # Single sentence classification, text_b doesn't exist\n                text_b = None\n                label = tokenization.convert_to_unicode(line[1])\n                examples.append(InputExample(guid=guid, text_a=text_a,\n                                             text_b=text_b, label=label))\n        if set_type == 'test':\n            for (i, line) in enumerate(lines):\n                if i == 0:\n                    continue\n                guid = \"%s-%s\" % (set_type, i)\n                text_a = tokenization.convert_to_unicode(line[1])\n                # Single sentence classification, text_b doesn't exist\n                text_b = None\n                label = '0' # arbitrary set as 0\n                examples.append(InputExample(guid=guid, text_a=text_a,\n                                             text_b=text_b, label=label))\n        return examples\n\nclass XnliProcessor(DataProcessor):\n    \"\"\"Processor for the XNLI data set.\"\"\"\n\n    def __init__(self):\n        self.language = \"zh\"\n\n    def get_train_examples(self, data_dir):\n        \"\"\"See base class.\"\"\"\n        lines = self._read_tsv(\n            os.path.join(data_dir, \"multinli\",\n                         \"multinli.train.%s.tsv\" % self.language))\n        examples = []\n        for (i, line) in enumerate(lines):\n            if i == 0:\n                continue\n            guid = \"train-%d\" % (i)\n            text_a = tokenization.convert_to_unicode(line[0])\n            text_b = tokenization.convert_to_unicode(line[1])\n            label = tokenization.convert_to_unicode(line[2])\n            if label == tokenization.convert_to_unicode(\"contradictory\"):\n                label = tokenization.convert_to_unicode(\"contradiction\")\n            examples.append(InputExample(guid=guid, text_a=text_a,\n                                         text_b=text_b, label=label))\n        return examples\n\n    def get_dev_examples(self, data_dir):\n        \"\"\"See base class.\"\"\"\n        lines = self._read_tsv(os.path.join(data_dir, \"xnli.dev.tsv\"))\n        examples = []\n        for (i, line) in enumerate(lines):\n            if i == 0:\n                continue\n            guid = \"dev-%d\" % (i)\n            language = tokenization.convert_to_unicode(line[0])\n            if language != tokenization.convert_to_unicode(self.language):\n                continue\n            text_a = tokenization.convert_to_unicode(line[6])\n            text_b = tokenization.convert_to_unicode(line[7])\n            label = tokenization.convert_to_unicode(line[1])\n            examples.append(InputExample(guid=guid, text_a=text_a,\n                                         text_b=text_b, label=label))\n        return examples\n\n    def get_labels(self):\n        \"\"\"See base class.\"\"\"\n        return [\"contradiction\", \"entailment\", \"neutral\"]\n\nclass MnliProcessor(DataProcessor):\n    \"\"\"Processor for the MultiNLI data set (GLUE version).\"\"\"\n\n    def get_train_examples(self, data_dir):\n        \"\"\"See base class.\"\"\"\n        return self._create_examples(\n            self._read_tsv(os.path.join(data_dir, \"train.tsv\")), \"train\")\n\n    def get_dev_examples(self, data_dir):\n        \"\"\"See base class.\"\"\"\n        return self._create_examples(\n            self._read_tsv(os.path.join(data_dir, \"dev_matched.tsv\")),\n            \"dev_matched\")\n\n    def get_test_examples(self, data_dir):\n        \"\"\"See base class.\"\"\"\n        return self._create_examples(\n            self._read_tsv(os.path.join(data_dir, \"test_matched.tsv\")),\n            \"test\")\n\n    def get_labels(self):\n        \"\"\"See base class.\"\"\"\n        return [\"contradiction\", \"entailment\", \"neutral\"]\n\n    def _create_examples(self, lines, set_type):\n        \"\"\"Creates examples for the training and dev sets.\"\"\"\n        examples = []\n        for (i, line) in enumerate(lines):\n            if i == 0:\n                continue\n            guid = \"%s-%s\" % (set_type,\n                              tokenization.convert_to_unicode(line[0]))\n            text_a = tokenization.convert_to_unicode(line[8])\n            text_b = tokenization.convert_to_unicode(line[9])\n            if set_type == \"test\":\n                label = \"contradiction\"\n            else:\n                label = tokenization.convert_to_unicode(line[-1])\n            examples.append(InputExample(guid=guid, text_a=text_a,\n                                         text_b=text_b, label=label))\n        return examples\n\nclass MrpcProcessor(DataProcessor):\n    \"\"\"Processor for the MRPC data set (GLUE version).\"\"\"\n\n    def get_train_examples(self, data_dir):\n        \"\"\"See base class.\"\"\"\n        return self._create_examples(\n            self._read_tsv(os.path.join(data_dir, \"train.tsv\")),\n            \"train\")\n\n    def get_dev_examples(self, data_dir):\n        \"\"\"See base class.\"\"\"\n        return self._create_examples(\n            self._read_tsv(os.path.join(data_dir, \"dev.tsv\")),\n            \"dev\")\n\n    def get_test_examples(self, data_dir):\n        \"\"\"See base class.\"\"\"\n        return self._create_examples(\n            self._read_tsv(os.path.join(data_dir, \"test.tsv\")),\n            \"test\")\n\n    def get_labels(self):\n        \"\"\"See base class.\"\"\"\n        return [\"0\", \"1\"]\n\n    def _create_examples(self, lines, set_type):\n        \"\"\"Creates examples for the training and dev sets.\"\"\"\n        examples = []\n        for (i, line) in enumerate(lines):\n            if i == 0:\n                continue\n            guid = \"%s-%s\" % (set_type, i)\n            text_a = tokenization.convert_to_unicode(line[3])\n            text_b = tokenization.convert_to_unicode(line[4])\n            if set_type == \"test\":\n                label = \"0\"\n            else:\n                label = tokenization.convert_to_unicode(line[0])\n            examples.append(InputExample(guid=guid, text_a=text_a,\n                                         text_b=text_b, label=label))\n        return examples\n\nclass ColaProcessor(DataProcessor):\n    \"\"\"Processor for the CoLA data set (GLUE version).\"\"\"\n\n    def get_train_examples(self, data_dir):\n        \"\"\"See base class.\"\"\"\n        return self._create_examples(\n            self._read_tsv(os.path.join(data_dir, \"train.tsv\")),\n            \"train\")\n\n    def get_dev_examples(self, data_dir):\n        \"\"\"See base class.\"\"\"\n        return self._create_examples(\n            self._read_tsv(os.path.join(data_dir, \"dev.tsv\")),\n            \"dev\")\n\n    def get_test_examples(self, data_dir):\n        \"\"\"See base class.\"\"\"\n        return self._create_examples(\n            self._read_tsv(os.path.join(data_dir, \"test.tsv\")),\n            \"test\")\n\n    def get_labels(self):\n        \"\"\"See base class.\"\"\"\n        return [\"0\", \"1\"]\n\n    def _create_examples(self, lines, set_type):\n        \"\"\"Creates examples for the training and dev sets.\"\"\"\n        examples = []\n        for (i, line) in enumerate(lines):\n            # Only the test set has a header\n            if set_type == \"test\" and i == 0:\n                continue\n            guid = \"%s-%s\" % (set_type, i)\n            if set_type == \"test\":\n                text_a = tokenization.convert_to_unicode(line[1])\n                label = \"0\"\n            else:\n                text_a = tokenization.convert_to_unicode(line[3])\n                label = tokenization.convert_to_unicode(line[1])\n            examples.append(InputExample(guid=guid, text_a=text_a,\n                                         text_b=None, label=label))\n        return examples\n\n\ndef convert_single_example(ex_index, example, label_list, max_seq_length,\n                           tokenizer):\n    \"\"\"Converts a single `InputExample` into a single `InputFeatures`.\"\"\"\n    label_map = {}\n    for (i, label) in enumerate(label_list):\n        label_map[label] = i\n\n    tokens_a = tokenizer.tokenize(example.text_a)\n    tokens_b = None\n    if example.text_b:\n        tokens_b = tokenizer.tokenize(example.text_b)\n\n    if tokens_b:\n        # Modifies `tokens_a` and `tokens_b` in place so that the total\n        # length is less than the specified length.\n        # Account for [CLS], [SEP], [SEP] with \"- 3\"\n        _truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3)\n    else:\n        # Account for [CLS] and [SEP] with \"- 2\"\n        if len(tokens_a) > max_seq_length - 2:\n            tokens_a = tokens_a[0:(max_seq_length - 2)]\n\n    # The convention rule is:\n    # (a) For sequence pairs:\n    #   tokens: [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]\n    #    segment_ids: 0 0 0 0 0 0 0 0                       1 1 1 1 1 1\n    # (b) For single sequences:\n    #   tokens: [CLS] the dog is hairy . [SEP]\n    #   sigment_ids: 0 0 0 0 0 0 0\n    #\n    # Where \"segment_ids\" are used to indicate whether this is the first\n    # sequence or the second sequence. The embedding vectors for `type=0` and\n    # `type=1` were learned during pre-training and are added to the wordpiece\n    # embedding vector (and position vector). This is not *strictly* necessary\n    # since the [SEP] token unambiguously separates the sequences, but it makes\n    # it easier for the model to learn the concept of sequences.\n    #\n    # For classification tasks, the first vector (corresponding to [CLS]) is\n    # used as as the \"sentence vector\". Note that this only makes sense because\n    # the entire model is fine-tuned.\n    tokens = []\n    segment_ids = []\n    tokens.append(\"[CLS]\")\n    segment_ids.append(0)\n    for token in tokens_a:\n        tokens.append(token)\n        segment_ids.append(0)\n    tokens.append(\"[SEP]\")\n    segment_ids.append(0)\n\n    if tokens_b:\n        for token in tokens_b:\n            tokens.append(token)\n            segment_ids.append(1)\n        tokens.append(\"[SEP]\")\n        segment_ids.append(1)\n\n    input_ids = tokenizer.convert_tokens_to_ids(tokens)\n\n    # The mask has 1 for real tokens and 0 for padding tokens. Only real\n    # tokens are attended to.\n    input_mask = [1] * len(input_ids)\n\n    # Zero-pad up to the sequence length.\n    while len(input_ids) < max_seq_length:\n        input_ids.append(0)\n        input_mask.append(0)\n        segment_ids.append(0)\n\n    assert len(input_ids) == max_seq_length\n    assert len(input_mask) == max_seq_length\n    assert len(segment_ids) == max_seq_length\n\n    label_id = label_map[example.label]\n\n    # here we disable the verbose printing of the data\n    if ex_index < 0:\n        tf.logging.info(\"*** Example ***\")\n        tf.logging.info(\"guid: %s\" % (example.guid))\n        tf.logging.info(\"tokens: %s\" % \" \".join(\n            [tokenization.printable_text(x) for x in tokens]))\n        tf.logging.info(\"input_ids: %s\" % \" \".join([str(x) for x in input_ids]))\n        tf.logging.info(\"input_ids length: %d\" % len(input_ids))\n        tf.logging.info(\"input_mask: %s\" %\\\n            \" \".join([str(x) for x in input_mask]))\n        tf.logging.info(\"segment_ids: %s\" %\\\n            \" \".join([str(x) for x in segment_ids]))\n        tf.logging.info(\"label: %s (id = %d)\" % (example.label, label_id))\n\n    feature = InputFeatures(input_ids=input_ids,\n                            input_mask=input_mask,\n                            segment_ids=segment_ids,\n                            label_id=label_id)\n    return feature\n\n\ndef file_based_convert_examples_to_features(\n        examples, label_list, max_seq_length, tokenizer, output_file):\n    \"\"\"Convert a set of `InputExample`s to a TFRecord file.\"\"\"\n\n    writer = tf.python_io.TFRecordWriter(output_file)\n\n    for (ex_index, example) in enumerate(examples):\n\n        feature = convert_single_example(ex_index, example, label_list,\n                                         max_seq_length, tokenizer)\n\n        def create_int_feature(values):\n            return tf.train.Feature(\n                int64_list=tf.train.Int64List(value=list(values)))\n\n        features = collections.OrderedDict()\n        features[\"input_ids\"] = create_int_feature(feature.input_ids)\n        features[\"input_mask\"] = create_int_feature(feature.input_mask)\n        features[\"segment_ids\"] = create_int_feature(feature.segment_ids)\n        features[\"label_ids\"] = create_int_feature([feature.label_id])\n\n        tf_example = tf.train.Example(\n            features=tf.train.Features(feature=features))\n        writer.write(tf_example.SerializeToString())\n\ndef file_based_input_fn_builder(input_file, seq_length, is_training,\n                                drop_remainder, is_distributed=False):\n    \"\"\"Creates an `input_fn` closure to be passed to TPUEstimator.\"\"\"\n\n    name_to_features = {\n        \"input_ids\": tf.FixedLenFeature([seq_length], tf.int64),\n        \"input_mask\": tf.FixedLenFeature([seq_length], tf.int64),\n        \"segment_ids\": tf.FixedLenFeature([seq_length], tf.int64),\n        \"label_ids\": tf.FixedLenFeature([], tf.int64),\n    }\n\n    def _decode_record(record, name_to_features):\n        \"\"\"Decodes a record to a TensorFlow example.\"\"\"\n        example = tf.parse_single_example(record, name_to_features)\n\n        # tf.Example only supports tf.int64, but the TPU only supports tf.int32.\n        # So cast all int64 to int32.\n        for name in list(example.keys()):\n            t = example[name]\n            if t.dtype == tf.int64:\n                t = tf.to_int32(t)\n            example[name] = t\n\n        return example\n\n    def input_fn(params):\n        \"\"\"The actual input function.\"\"\"\n        batch_size = params[\"batch_size\"]\n\n        # For training, we want a lot of parallel reading and shuffling.\n        # For eval, we want no shuffling and parallel reading doesn't matter.\n        d = tf.data.TFRecordDataset(input_file)\n        if is_training:\n\n            if is_distributed:\n                import horovod.tensorflow as hvd\n                tf.logging.info('distributed mode is enabled.'\n                                'size:{} rank:{}'.format(hvd.size(), hvd.rank()))\n                # https://github.com/uber/horovod/issues/223\n                d = d.shard(hvd.size(), hvd.rank())\n\n                d = d.repeat()\n                d = d.shuffle(buffer_size=100)\n                d = d.apply(\n                    tf.contrib.data.map_and_batch(\n                        lambda record: _decode_record(record, name_to_features),\n                        batch_size=batch_size//hvd.size(),\n                        drop_remainder=drop_remainder))\n            else:\n                tf.logging.info('distributed mode is not enabled.')\n                d = d.repeat()\n                d = d.shuffle(buffer_size=100)\n                d = d.apply(\n                    tf.contrib.data.map_and_batch(\n                        lambda record: _decode_record(record, name_to_features),\n                        batch_size=batch_size,\n                        drop_remainder=drop_remainder))\n\n        else:\n            d = d.apply(\n                tf.contrib.data.map_and_batch(\n                    lambda record: _decode_record(record, name_to_features),\n                    batch_size=batch_size,\n                    drop_remainder=drop_remainder))\n\n        return d\n    return input_fn\n\ndef _truncate_seq_pair(tokens_a, tokens_b, max_length):\n    \"\"\"Truncates a sequence pair in place to the maximum length.\"\"\"\n\n    # This is a simple heuristic which will always truncate the longer sequence\n    # one token at a time. This makes more sense than truncating an equal\n    # percent of tokens from each, since if one sequence is very short then\n    # each token that's truncated likely contains more information than a\n    # longer sequence.\n    while True:\n        total_length = len(tokens_a) + len(tokens_b)\n        if total_length <= max_length:\n            break\n        if len(tokens_a) > len(tokens_b):\n            tokens_a.pop()\n        else:\n            tokens_b.pop()\n\ndef get_dataset(processor,\n                tokenizer,\n                data_dir,\n                max_seq_length,\n                batch_size,\n                mode,\n                output_dir,\n                is_distributed=False):\n    \"\"\"\n    Args:\n        processor: Data Preprocessor, must have get_lables,\n            get_train/dev/test/examples methods defined.\n        tokenizer: The Sentence Tokenizer. Generally should be\n            SentencePiece Model.\n        data_dir: The input data directory.\n        max_seq_length: Max sequence length.\n        batch_size: mini-batch size.\n        model: `train`, `eval` or `test`.\n        output_dir: The directory to save the TFRecords in.\n    \"\"\"\n    label_list = processor.get_labels()\n    if mode == 'train':\n        train_examples = processor.get_train_examples(data_dir)\n        train_file = os.path.join(output_dir, \"train.tf_record\")\n        file_based_convert_examples_to_features(\n            train_examples, label_list, max_seq_length,\n            tokenizer, train_file)\n        dataset = file_based_input_fn_builder(\n            input_file=train_file,\n            seq_length=max_seq_length,\n            is_training=True,\n            drop_remainder=True,\n            is_distributed=is_distributed)({'batch_size': batch_size})\n    elif mode == 'eval':\n        eval_examples = processor.get_dev_examples(data_dir)\n        eval_file = os.path.join(output_dir, \"eval.tf_record\")\n        file_based_convert_examples_to_features(\n            eval_examples, label_list, max_seq_length, tokenizer, eval_file)\n        dataset = file_based_input_fn_builder(\n            input_file=eval_file,\n            seq_length=max_seq_length,\n            is_training=False,\n            drop_remainder=False)({'batch_size': batch_size})\n    elif mode == 'test':\n        test_examples = processor.get_test_examples(data_dir)\n        test_file = os.path.join(output_dir, \"predict.tf_record\")\n        file_based_convert_examples_to_features(\n            test_examples, label_list, max_seq_length, tokenizer, test_file)\n        dataset = file_based_input_fn_builder(\n            input_file=test_file,\n            seq_length=max_seq_length,\n            is_training=False,\n            drop_remainder=False)({'batch_size': batch_size})\n    return dataset\n"
  },
  {
    "path": "texar_repo/examples/bert/utils/model_utils.py",
    "content": "\"\"\"\nModel utility functions\n\"\"\"\nimport json\nimport collections\nimport re\nimport random\nimport tensorflow as tf\nimport numpy as np\nfrom texar import HParams\n\n\"\"\"\nLoad the Json config file and transform it into Texar style configuration.\n\"\"\"\ndef transform_bert_to_texar_config(input_json):\n    config_ckpt = json.loads(\n        open(input_json).read())\n    configs = {}\n    configs['random_seed'] = 123\n    configs['hidden_size'] = config_ckpt['hidden_size']\n    hidden_dim = config_ckpt['hidden_size']\n    configs['embed'] = {\n        'name': 'word_embeddings',\n        'dim': hidden_dim}\n    configs['vocab_size'] = config_ckpt['vocab_size']\n    configs['segment_embed'] = {\n        'name': 'token_type_embeddings',\n        'dim': hidden_dim}\n    configs['type_vocab_size'] = config_ckpt['type_vocab_size']\n\n    configs['encoder'] = {\n        'name': 'encoder',\n        'position_embedder_type': 'variables',\n        'position_size': config_ckpt['max_position_embeddings'],\n        'position_embedder_hparams': {\n            'dim': hidden_dim,\n        },\n        'embedding_dropout': config_ckpt['hidden_dropout_prob'],\n        'num_blocks': config_ckpt['num_hidden_layers'],\n        'multihead_attention': {\n            'use_bias': True,\n            'num_units': hidden_dim,\n            'num_heads': config_ckpt['num_attention_heads'],\n            'output_dim': hidden_dim,\n            'dropout_rate': config_ckpt['attention_probs_dropout_prob'],\n            'name': 'self'\n        },\n        'residual_dropout': config_ckpt['hidden_dropout_prob'],\n        'dim': hidden_dim,\n        'use_bert_config': True,\n        'poswise_feedforward': {\n            \"layers\": [\n                {\n                    'type': 'Dense',\n                    'kwargs': {\n                        'name': 'intermediate',\n                        'units': config_ckpt['intermediate_size'],\n                        'activation': config_ckpt['hidden_act'],\n                        'use_bias': True,\n                    }\n                },\n                {\n                    'type': 'Dense',\n                    'kwargs': {\n                        'name': 'output',\n                        'units': hidden_dim,\n                        'activation': None,\n                        'use_bias': True,\n                    }\n                },\n            ],\n        },\n    }\n    return HParams(configs, default_hparams=None)\n\ndef get_lr(global_step, num_train_steps, num_warmup_steps, static_lr):\n    \"\"\"\n    Calculate the learinng rate given global step and warmup steps.\n    The learinng rate is following a linear warmup and linear decay.\n    \"\"\"\n    learning_rate = tf.constant(value=static_lr,\n                                shape=[], dtype=tf.float32)\n\n    learning_rate = tf.train.polynomial_decay(\n        learning_rate,\n        global_step,\n        num_train_steps,\n        end_learning_rate=0.0,\n        power=1.0,\n        cycle=False)\n\n    if num_warmup_steps:\n        global_steps_int = tf.cast(global_step, tf.int32)\n        warmup_steps_int = tf.constant(num_warmup_steps, dtype=tf.int32)\n\n        global_steps_float = tf.cast(global_steps_int, tf.float32)\n        warmup_steps_float = tf.cast(warmup_steps_int, tf.float32)\n\n        warmup_percent_done = global_steps_float / warmup_steps_float\n        warmup_learning_rate = static_lr * warmup_percent_done\n\n        is_warmup = tf.cast(global_steps_int < warmup_steps_int, tf.float32)\n        learning_rate = ((1.0 - is_warmup) * learning_rate\\\n            +is_warmup * warmup_learning_rate)\n\n    return learning_rate\n\ndef _get_assignment_map_from_checkpoint(tvars, init_checkpoint):\n    \"\"\"\n    Compute the union of the current variables and checkpoint variables.\n    Because the variable scope of the original BERT and Texar implementation,\n    we need to build a assignment map to match the variables.\n    \"\"\"\n    assignment_map = {}\n    initialized_variable_names = {}\n\n    name_to_variable = collections.OrderedDict()\n    for var in tvars:\n        name = var.name\n        m = re.match(\"^(.*):\\\\d+$\", name)\n        if m is not None:\n            name = m.group(1)\n        name_to_variable[name] = var\n    init_vars = tf.train.list_variables(init_checkpoint)\n\n    assignment_map = {\n        'bert/embeddings/word_embeddings': 'bert/word_embeddings/w',\n        'bert/embeddings/token_type_embeddings': 'bert/token_type_embeddings/w',\n        'bert/embeddings/position_embeddings':\n            'bert/encoder/position_embedder/w',\n        'bert/embeddings/LayerNorm/beta': 'bert/encoder/LayerNorm/beta',\n        'bert/embeddings/LayerNorm/gamma': 'bert/encoder/LayerNorm/gamma',\n    }\n    for check_name, model_name in assignment_map.items():\n        initialized_variable_names[model_name] = 1\n        initialized_variable_names[model_name + \":0\"] = 1\n\n    for check_name, shape in init_vars:\n        if check_name.startswith('bert'):\n            if check_name.startswith('bert/embeddings'):\n                continue\n            model_name = re.sub(\n                'layer_\\d+/output/dense',\n                lambda x: x.group(0).replace('output/dense', 'ffn/output'),\n                check_name)\n            if model_name == check_name:\n                model_name = re.sub(\n                    'layer_\\d+/output/LayerNorm',\n                    lambda x: x.group(0).replace('output/LayerNorm',\n                                                 'ffn/LayerNorm'),\n                    check_name)\n            if model_name == check_name:\n                model_name = re.sub(\n                    'layer_\\d+/intermediate/dense',\n                    lambda x: x.group(0).replace('intermediate/dense',\n                                                 'ffn/intermediate'),\n                    check_name)\n            if model_name == check_name:\n                model_name = re.sub('attention/output/dense',\n                                    'attention/self/output', check_name)\n            if model_name == check_name:\n                model_name = check_name.replace('attention/output/LayerNorm',\n                                                'output/LayerNorm')\n            assert model_name in name_to_variable.keys(),\\\n                'model name:{} not exists!'.format(model_name)\n\n            assignment_map[check_name] = model_name\n            initialized_variable_names[model_name] = 1\n            initialized_variable_names[model_name + \":0\"] = 1\n\n    return (assignment_map, initialized_variable_names)\n\ndef init_bert_checkpoint(init_checkpoint):\n    tvars = tf.trainable_variables()\n    initialized_variable_names = []\n    if init_checkpoint:\n        (assignment_map, initialized_variable_names\n        ) = _get_assignment_map_from_checkpoint(tvars, init_checkpoint)\n        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)\n\ndef set_random_seed(myseed):\n    tf.set_random_seed(myseed)\n    np.random.seed(myseed)\n    random.seed(myseed)\n"
  },
  {
    "path": "texar_repo/examples/bert/utils/tokenization.py",
    "content": "# coding=utf-8\n# Copied from google BERT repo.\n\n# Copyright 2018 The Google AI Language Team Authors.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#         http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Tokenization classes.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport sys\nimport collections\nimport unicodedata\n\nimport tensorflow as tf\n\ndef convert_to_unicode(text):\n    \"\"\"Returns the given argument as a unicode string.\"\"\"\n    return tf.compat.as_text(text)\n\ndef printable_text(text):\n    \"\"\"Returns text encoded in a way suitable for print or `tf.logging`.\"\"\"\n    return tf.compat.as_str_any(text)\n\ndef load_vocab(vocab_file):\n    \"\"\"Loads a vocabulary file into a dictionary.\"\"\"\n    vocab = collections.OrderedDict()\n    index = 0\n    with tf.gfile.GFile(vocab_file, \"r\") as reader:\n        while True:\n            token = tf.compat.as_text(reader.readline())\n            if not token:\n                break\n            token = token.strip()\n            vocab[token] = index\n            index += 1\n    return vocab\n\n\ndef convert_by_vocab(vocab, items):\n    \"\"\"Converts a sequence of [tokens|ids] using the vocab.\"\"\"\n    output = []\n    for item in items:\n        output.append(vocab[item])\n    return output\n\n\ndef convert_tokens_to_ids(vocab, tokens):\n    return convert_by_vocab(vocab, tokens)\n\n\ndef convert_ids_to_tokens(inv_vocab, ids):\n    return convert_by_vocab(inv_vocab, ids)\n\n\ndef whitespace_tokenize(text):\n    \"\"\"Runs basic whitespace cleaning and splitting on a peice of text.\"\"\"\n    text = text.strip()\n    if not text:\n        return []\n    tokens = text.split()\n    return tokens\n\n\nclass FullTokenizer(object):\n    \"\"\"Runs end-to-end tokenziation.\"\"\"\n\n    def __init__(self, vocab_file, do_lower_case=True):\n        self.vocab = load_vocab(vocab_file)\n        self.inv_vocab = {v: k for k, v in self.vocab.items()}\n        self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case)\n        self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)\n\n    def tokenize(self, text):\n        split_tokens = []\n        for token in self.basic_tokenizer.tokenize(text):\n            for sub_token in self.wordpiece_tokenizer.tokenize(token):\n                split_tokens.append(sub_token)\n\n        return split_tokens\n\n    def convert_tokens_to_ids(self, tokens):\n        return convert_by_vocab(self.vocab, tokens)\n\n    def convert_ids_to_tokens(self, ids):\n        return convert_by_vocab(self.inv_vocab, ids)\n\n\nclass BasicTokenizer(object):\n    \"\"\"Runs basic tokenization (punctuation splitting, lower casing, etc.).\"\"\"\n\n    def __init__(self, do_lower_case=True):\n        \"\"\"Constructs a BasicTokenizer.\n\n        Args:\n            do_lower_case: Whether to lower case the input.\n        \"\"\"\n        self.do_lower_case = do_lower_case\n\n    def tokenize(self, text):\n        \"\"\"Tokenizes a piece of text.\"\"\"\n        text = tf.compat.as_text(text)\n        text = self._clean_text(text)\n\n        # This was added on November 1st, 2018 for the multilingual and Chinese\n        # models. This is also applied to the English models now, but it doesn't\n        # matter since the English models were not trained on any Chinese data\n        # and generally don't have any Chinese data in them (there are Chinese\n        # characters in the vocabulary because Wikipedia does have some Chinese\n        # words in the English Wikipedia.).\n        text = self._tokenize_chinese_chars(text)\n\n        orig_tokens = whitespace_tokenize(text)\n        split_tokens = []\n        for token in orig_tokens:\n            if self.do_lower_case:\n                token = token.lower()\n                token = self._run_strip_accents(token)\n            split_tokens.extend(self._run_split_on_punc(token))\n\n        output_tokens = whitespace_tokenize(\" \".join(split_tokens))\n        return output_tokens\n\n    def _run_strip_accents(self, text):\n        \"\"\"Strips accents from a piece of text.\"\"\"\n        text = unicodedata.normalize(\"NFD\", text)\n        output = []\n        for char in text:\n            cat = unicodedata.category(char)\n            if cat == \"Mn\":\n                continue\n            output.append(char)\n        return \"\".join(output)\n\n    def _run_split_on_punc(self, text):\n        \"\"\"Splits punctuation on a piece of text.\"\"\"\n        chars = list(text)\n        i = 0\n        start_new_word = True\n        output = []\n        while i < len(chars):\n            char = chars[i]\n            if _is_punctuation(char):\n                output.append([char])\n                start_new_word = True\n            else:\n                if start_new_word:\n                    output.append([])\n                start_new_word = False\n                output[-1].append(char)\n            i += 1\n\n        return [\"\".join(x) for x in output]\n\n    def _tokenize_chinese_chars(self, text):\n        \"\"\"Adds whitespace around any CJK character.\"\"\"\n        output = []\n        for char in text:\n            cp = ord(char)\n            if self._is_chinese_char(cp):\n                output.append(\" \")\n                output.append(char)\n                output.append(\" \")\n            else:\n                output.append(char)\n        return \"\".join(output)\n\n    def _is_chinese_char(self, cp):\n        \"\"\"Checks whether CP is the codepoint of a CJK character.\"\"\"\n        # This defines a \"chinese character\" as anything in the CJK Unicode\n        # block:\n        # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)\n        #\n        # Note that the CJK Unicode block is NOT all Japanese and Korean\n        # characters, despite its name.\n        # The modern Korean Hangul alphabet is a different block,\n        # as is Japanese Hiragana and Katakana. Those alphabets are used to\n        # write space-separated words, so they are not treated specially and\n        # handled like the all of the other languages.\n        if ((cp >= 0x4E00 and cp <= 0x9FFF) or    #\n                (cp >= 0x3400 and cp <= 0x4DBF) or    #\n                (cp >= 0x20000 and cp <= 0x2A6DF) or    #\n                (cp >= 0x2A700 and cp <= 0x2B73F) or    #\n                (cp >= 0x2B740 and cp <= 0x2B81F) or    #\n                (cp >= 0x2B820 and cp <= 0x2CEAF) or\n                (cp >= 0xF900 and cp <= 0xFAFF) or    #\n                (cp >= 0x2F800 and cp <= 0x2FA1F)):    #\n            return True\n\n        return False\n\n    def _clean_text(self, text):\n        \"\"\"Performs invalid character removal and whitespace cleanup on text.\"\"\"\n        output = []\n        for char in text:\n            cp = ord(char)\n            if cp == 0 or cp == 0xfffd or _is_control(char):\n                continue\n            if _is_whitespace(char):\n                output.append(\" \")\n            else:\n                output.append(char)\n        return \"\".join(output)\n\n\nclass WordpieceTokenizer(object):\n    \"\"\"Runs WordPiece tokenziation.\"\"\"\n\n    def __init__(self, vocab, unk_token=\"[UNK]\", max_input_chars_per_word=100):\n        self.vocab = vocab\n        self.unk_token = unk_token\n        self.max_input_chars_per_word = max_input_chars_per_word\n\n    def tokenize(self, text):\n        \"\"\"Tokenizes a piece of text into its word pieces.\n\n        This uses a greedy longest-match-first algorithm to perform tokenization\n        using the given vocabulary.\n\n        For example:\n            input = \"unaffable\"\n            output = [\"un\", \"##aff\", \"##able\"]\n        Args:\n            text: A single token or whitespace separated tokens.\n                This should have already been passed through `BasicTokenizer.\n\n        Returns:\n            A list of wordpiece tokens.\n        \"\"\"\n\n        text = tf.compat.as_text(text)\n\n        output_tokens = []\n        for token in whitespace_tokenize(text):\n            chars = list(token)\n            if len(chars) > self.max_input_chars_per_word:\n                output_tokens.append(self.unk_token)\n                continue\n\n            is_bad = False\n            start = 0\n            sub_tokens = []\n            while start < len(chars):\n                end = len(chars)\n                cur_substr = None\n                while start < end:\n                    substr = \"\".join(chars[start:end])\n                    if start > 0:\n                        substr = \"##\" + substr\n                    if substr in self.vocab:\n                        cur_substr = substr\n                        break\n                    end -= 1\n                if cur_substr is None:\n                    is_bad = True\n                    break\n                sub_tokens.append(cur_substr)\n                start = end\n\n            if is_bad:\n                output_tokens.append(self.unk_token)\n            else:\n                output_tokens.extend(sub_tokens)\n        return output_tokens\n\n\ndef _is_whitespace(char):\n    \"\"\"Checks whether `chars` is a whitespace character.\"\"\"\n    # \\t, \\n, and \\r are technically contorl characters but we treat them\n    # as whitespace since they are generally considered as such.\n    if char == \" \" or char == \"\\t\" or char == \"\\n\" or char == \"\\r\":\n        return True\n    cat = unicodedata.category(char)\n    if cat == \"Zs\":\n        return True\n    return False\n\n\ndef _is_control(char):\n    \"\"\"Checks whether `chars` is a control character.\"\"\"\n    # These are technically control characters but we count them as whitespace\n    # characters.\n    if char == \"\\t\" or char == \"\\n\" or char == \"\\r\":\n        return False\n    cat = unicodedata.category(char)\n    if cat.startswith(\"C\"):\n        return True\n    return False\n\n\ndef _is_punctuation(char):\n    \"\"\"Checks whether `chars` is a punctuation character.\"\"\"\n    cp = ord(char)\n    # We treat all non-letter/number ASCII as punctuation.\n    # Characters such as \"^\", \"$\", and \"`\" are not in the Unicode\n    # Punctuation class but we treat them as punctuation anyways, for\n    # consistency.\n    if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or\n            (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)):\n        return True\n    cat = unicodedata.category(char)\n    if cat.startswith(\"P\"):\n        return True\n    return False\n"
  },
  {
    "path": "texar_repo/examples/distributed_gpu/README.md",
    "content": "# Model Training with Multi/Distributed GPUs\n\nThis example shows how models built with Texar can be trained with multiple GPUs on single or multiple machines. Multi/Distributed-GPU training is based on the third-party library [Horovod](https://github.com/uber/horovod).\n\nHere we take language model for example, adapting the [single-GPU language model example](https://github.com/asyml/texar/tree/master/examples/language_model_ptb) by adding a few lines of Horovod-related code to enable distributed training (more details below).\n\n## Prerequisites\n\nTwo third-party packages are required:\n\n* `openmpi >= 3.0.0`\n* `horovod`\n\nThe following commands install [OpenMPI](https://www.open-mpi.org) 4.0.0 to the path `/usr/local/openmpi`. Run `mpirun --version` to check the version of installed OpenNMT.\n```\n# Download and install OpenMPI\nwget https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-4.0.0.tar.gz\ntar xvf openmpi-4.0.0.tar.gz\ncd openmpi-4.0.0/\n./configure --prefix=/usr/local/openmpi\nsudo make all install\n\n# Add path of the installed OpenMPI to your system path\nexport PATH=/usr/local/openmpi/bin:$PATH\nexport LD_LIBRARY_PATH=/usr/local/openmpi/lib:$LD_LIBRARY_PATH\n```\n\nThen install Horovod with the cmd:\n```\npip install horovod\n```\n\n## Adapting Single-GPU Code for distributed Training\n\nBased on the [single-GPU code](https://github.com/asyml/texar/tree/master/examples/language_model_ptb), we made the following adaptions. Note that one processor is created for each GPU.\n\n- Setting up Horovod in the code (click the links below to see the corresponding actual code in `lm_ptb_distributed.py`):\n    1. [`hvd.init()`](https://github.com/asyml/texar/blob/master/examples/distributed_gpu/lm_ptb_distributed.py#L76): initialize Horovod\n    2. [`hvd.DistributedOptimizer`](https://github.com/asyml/texar/blob/master/examples/distributed_gpu/lm_ptb_distributed.py#L131): wrap your optimizer.\n    3. [`hvd.broadcast_global_variables(0)`](https://github.com/asyml/texar/blob/master/examples/distributed_gpu/lm_ptb_distributed.py#L191): set the operator to broadcast your global variables to different processes from rank-0 process.\n    4. [set visible GPU list](https://github.com/asyml/texar/blob/master/examples/distributed_gpu/lm_ptb_distributed.py#L194) by `config.gpu_options.visible_device_list = str(hvd.local_rank())`, to make each process see the attached single GPU.\n    5. [run the broadcast node](https://github.com/asyml/texar/blob/master/examples/distributed_gpu/lm_ptb_distributed.py#L203): run the broadcast operator before training\n- Data sharding:\n    1. To make sure different GPUs (processors) receive different data batches in each iteration, we [shard the training data](https://github.com/asyml/texar/blob/master/examples/distributed_gpu/ptb_reader.py#L52) into `N` parts, where `N` is the number of GPUs (processors).\n    2. In this example, `batch_size` in the config files denotes the total batch size in each iteration of all processors. That is, in each iteration, each processor receives `batch_size`/`N` data instances. This replicates the gradients in the single-GPU setting, and we use the same `learning_rate` as in single-GPU.\n\n## Usage ##\n\nRun the following command to train the model with multiple GPUs on multiple machines:\n```\nmpirun -np 2 \\\n    -H  [IP-adress-of-server1]:1,[IP-address-of-server2]:1\\\n    -bind-to none -map-by slot \\\n    -x NCCL_DEBUG=INFO -x LD_LIBRARY_PATH -x PATH \\\n    -mca pml ob1 -mca btl tcp,self \\\n    -mca btl_tcp_if_include ens3 \\\n    python lm_ptb_distributed.py --config config_small --data_path ./\n```\n\nHere:\n  * The key configurations for ordinary users:\n  \n      - `-np`: total number of processes\n      - `-H`: IP addresses of different servers and the number of processes used in each server. For example, `-H 192.168.11.22:1,192.168.33.44:1`\n  * Other advanced configurations:\n  \n      - `--bind-to none`: specifies OpenMPI to not bind a training process to a single CPU core (which would hurt performance).\n      - `-map-by slot`: allows you to have a mixture of different NUMA configurations because the default behavior is to bind to the socket.\n      - `-x`: specifies (`-x NCCL_DEBUG=INFO`) or copies (`-x LD_LIBRARY_PATH`) an environment variable to all the workers.\n      - `-mca`: sets the MPI communication interface. Use the setting specified above to avoid possible multiprocessing and network communication issues.\n  * Language model configurations:\n      - `--config`: specifies the config file to use. E.g., the above use the configuration defined in config_small.py\n      - `--data_path`: specifies the directory containing PTB raw data (e.g., ptb.train.txt). If the data files do not exist, the program will automatically download, extract, and pre-process the data.\n\nThe model will begin training on the specified GPUs, and evaluate on the validation data periodically. Evaluation on the test data is performed after the training is done. Note that both validation and test are performed only on the rank-0 GPU (i.e., they are not distributed). \n\n## Results ##\n\nWe did simple test on two AWS p2.xlarge instances. \nSince the language model is small and the communication cost is considerable, as expected, the example here doesn't scale very well on 2-GPU 2-machine in terms of speedup rate. The perplexity results of multi-GPU are the same with those of single-GPU.\n\n| config | epochs | train | valid  | test  | time/epoch (2-gpu) | time/epoch (single-gpu) |\n| -------| -------| ------| -------| ------| -----| -----|\n| small  | 13     | 40.81 | 118.99 | 114.72| 207s | 137s |\n| medium | 39     | 44.18 |  87.63 |  84.42| 461s | 311s |\n| large  | 55     | 36.54 |  82.55 |  78.72| 1765s | 931s |\n"
  },
  {
    "path": "texar_repo/examples/distributed_gpu/config_large.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"PTB LM large size config.\n\"\"\"\n\n# pylint: disable=invalid-name, too-few-public-methods, missing-docstring\n\ninit_scale = 0.04\nnum_epochs = 55\nhidden_size = 1500\nkeep_prob = 0.35\nbatch_size = 20\nnum_steps = 35\n\ncell = {\n    \"type\": \"LSTMBlockCell\",\n    \"kwargs\": {\n        \"num_units\": hidden_size,\n        \"forget_bias\": 0.\n    },\n    \"dropout\": {\"output_keep_prob\": keep_prob},\n    \"num_layers\": 2\n}\nemb = {\n    \"dim\": hidden_size\n}\nopt = {\n    \"optimizer\": {\n        \"type\": \"GradientDescentOptimizer\",\n        \"kwargs\": {\"learning_rate\": 1.0}\n    },\n    \"gradient_clip\": {\n        \"type\": \"clip_by_global_norm\",\n        \"kwargs\": {\"clip_norm\": 10.}\n    },\n    \"learning_rate_decay\": {\n        \"type\": \"exponential_decay\",\n        \"kwargs\": {\n            \"decay_steps\": 1,\n            \"decay_rate\": 1. / 1.15,\n            \"staircase\": True\n        },\n        \"start_decay_step\": 14\n    }\n}\n"
  },
  {
    "path": "texar_repo/examples/distributed_gpu/config_medium.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"PTB LM medium size config.\n\"\"\"\n\n# pylint: disable=invalid-name, too-few-public-methods, missing-docstring\n\ninit_scale = 0.05\nnum_epochs = 39\nhidden_size = 650\nkeep_prob = 0.5\nbatch_size = 20\nnum_steps = 35\n\ncell = {\n    \"type\": \"LSTMBlockCell\",\n    \"kwargs\": {\n        \"num_units\": hidden_size,\n        \"forget_bias\": 0.\n    },\n    \"dropout\": {\"output_keep_prob\": keep_prob},\n    \"num_layers\": 2\n}\nemb = {\n    \"dim\": hidden_size\n}\nopt = {\n    \"optimizer\": {\n        \"type\": \"GradientDescentOptimizer\",\n        \"kwargs\": {\"learning_rate\": 1.0}\n    },\n    \"gradient_clip\": {\n        \"type\": \"clip_by_global_norm\",\n        \"kwargs\": {\"clip_norm\": 5.}\n    },\n    \"learning_rate_decay\": {\n        \"type\": \"exponential_decay\",\n        \"kwargs\": {\n            \"decay_steps\": 1,\n            \"decay_rate\": 0.8,\n            \"staircase\": True\n        },\n        \"start_decay_step\": 5\n    }\n}\n"
  },
  {
    "path": "texar_repo/examples/distributed_gpu/config_small.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"PTB LM small size config.\n\"\"\"\n\n# pylint: disable=invalid-name, too-few-public-methods, missing-docstring\n\ninit_scale = 0.1\nnum_epochs = 13\nhidden_size = 200\nkeep_prob = 1.0\nbatch_size = 20\nnum_steps = 20\n\ncell = {\n    \"type\": \"LSTMBlockCell\",\n    \"kwargs\": {\n        \"num_units\": hidden_size,\n        \"forget_bias\": 0.\n    },\n    \"dropout\": {\"output_keep_prob\": keep_prob},\n    \"num_layers\": 2\n}\nemb = {\n    \"dim\": hidden_size\n}\nopt = {\n    \"optimizer\": {\n        \"type\": \"GradientDescentOptimizer\",\n        \"kwargs\": {\"learning_rate\": 1.0}\n    },\n    \"gradient_clip\": {\n        \"type\": \"clip_by_global_norm\",\n        \"kwargs\": {\"clip_norm\": 5.}\n    },\n    \"learning_rate_decay\": {\n        \"type\": \"exponential_decay\",\n        \"kwargs\": {\n            \"decay_steps\": 1,\n            \"decay_rate\": 0.5,\n            \"staircase\": True\n        },\n        \"start_decay_step\": 3\n    }\n}\n"
  },
  {
    "path": "texar_repo/examples/distributed_gpu/lm_ptb_distributed.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Example for building the language model.\n\nThis is a reimpmentation of the TensorFlow official PTB example in:\ntensorflow/models/rnn/ptb\n\nModel and training are described in:\n(Zaremba, et. al.) Recurrent Neural Network Regularization\n http://arxiv.org/abs/1409.2329\n\nThere are 3 provided model configurations:\n===========================================\n| config | epochs | train | valid  | test\n===========================================\n| small  | 13     | 37.99 | 121.39 | 115.91\n| medium | 39     | 48.45 |  86.16 |  82.07\n| large  | 55     | 37.87 |  82.62 |  78.29\nThe exact results may vary depending on the random initialization.\n\nThe data required for this example is in the `data/` dir of the\nPTB dataset from Tomas Mikolov's webpage:\n\n$ wget http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz\n$ tar xvf simple-examples.tgz\n\nIf data is not provided, the program will download from above automatically.\n\nTo run:\n\n$ python lm_ptb.py --data_path=simple-examples/data --config=config_small\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=invalid-name, no-member, too-many-locals\n\nimport time\nimport importlib\nimport numpy as np\nimport tensorflow as tf\nimport texar as tx\nimport horovod.tensorflow as hvd\n\nfrom ptb_reader import prepare_data, ptb_iterator\n\nflags = tf.flags\n\nflags.DEFINE_string(\"data_path\", \"./\",\n                    \"Directory containing PTB raw data (e.g., ptb.train.txt). \"\n                    \"E.g., ./simple-examples/data. If not exists, \"\n                    \"the directory will be created and PTB raw data will \"\n                    \"be downloaded.\")\nflags.DEFINE_string(\"config\", \"config_small\", \"The config to use.\")\n\nFLAGS = flags.FLAGS\n\nconfig = importlib.import_module(FLAGS.config)\n\ndef _main(_):\n    # Data\n    tf.logging.set_verbosity(tf.logging.INFO)\n\n    ## 1. initialize the horovod\n    hvd.init()\n\n    batch_size = config.batch_size\n    num_steps = config.num_steps\n    data = prepare_data(FLAGS.data_path)\n    vocab_size = data[\"vocab_size\"]\n\n    inputs = tf.placeholder(tf.int32, [None, num_steps],\n                            name='inputs')\n    targets = tf.placeholder(tf.int32, [None, num_steps],\n                             name='targets')\n\n    # Model architecture\n    initializer = tf.random_uniform_initializer(\n        -config.init_scale, config.init_scale)\n    with tf.variable_scope(\"model\", initializer=initializer):\n        embedder = tx.modules.WordEmbedder(\n            vocab_size=vocab_size, hparams=config.emb)\n        emb_inputs = embedder(inputs)\n        if config.keep_prob < 1:\n            emb_inputs = tf.nn.dropout(\n                emb_inputs, tx.utils.switch_dropout(config.keep_prob))\n\n        decoder = tx.modules.BasicRNNDecoder(\n            vocab_size=vocab_size, hparams={\"rnn_cell\": config.cell})\n\n        # This _batch_size equals to batch_size // hvd.size() in\n        # distributed training.\n        # because the mini-batch is distributed to multiple GPUs\n\n        _batch_size = tf.shape(inputs)[0]\n        initial_state = decoder.zero_state(_batch_size,\n                                           tf.float32)\n        seq_length = tf.broadcast_to([num_steps], (_batch_size, ))\n        outputs, final_state, seq_lengths = decoder(\n            decoding_strategy=\"train_greedy\",\n            impute_finished=True,\n            inputs=emb_inputs,\n            sequence_length=seq_length,\n            initial_state=initial_state)\n    # Losses & train ops\n    mle_loss = tx.losses.sequence_sparse_softmax_cross_entropy(\n        labels=targets,\n        logits=outputs.logits,\n        sequence_length=seq_lengths)\n\n    # Use global_step to pass epoch, for lr decay\n    global_step = tf.placeholder(tf.int32)\n\n    opt = tx.core.get_optimizer(\n        global_step=global_step,\n        hparams=config.opt\n    )\n\n    # 2. wrap the optimizer\n    opt = hvd.DistributedOptimizer(opt)\n\n    train_op = tx.core.get_train_op(\n        loss=mle_loss,\n        optimizer=opt,\n        global_step=global_step,\n        learning_rate=None,\n        increment_global_step=False,\n        hparams=config.opt\n    )\n\n    def _run_epoch(sess, data_iter, epoch, is_train=False, verbose=False):\n        start_time = time.time()\n        loss = 0.\n        iters = 0\n\n        fetches = {\n            \"mle_loss\": mle_loss,\n            \"final_state\": final_state,\n        }\n        if is_train:\n            fetches[\"train_op\"] = train_op\n            epoch_size = (len(data[\"train_text_id\"]) // batch_size - 1)\\\n                // num_steps\n\n        mode = (tf.estimator.ModeKeys.TRAIN\n                if is_train\n                else tf.estimator.ModeKeys.EVAL)\n\n\n        for step, (x, y) in enumerate(data_iter):\n            if step == 0:\n                state = sess.run(initial_state,\n                                 feed_dict={inputs: x})\n\n            feed_dict = {\n                inputs: x, targets: y, global_step: epoch,\n                tx.global_mode(): mode,\n            }\n            for i, (c, h) in enumerate(initial_state):\n                feed_dict[c] = state[i].c\n                feed_dict[h] = state[i].h\n\n            rets = sess.run(fetches, feed_dict)\n            loss += rets[\"mle_loss\"]\n            state = rets[\"final_state\"]\n            iters += num_steps\n\n            ppl = np.exp(loss / iters)\n            if verbose and is_train and hvd.rank() == 0 \\\n                and (step+1) % (epoch_size // 10) == 0:\n                tf.logging.info(\"%.3f perplexity: %.3f speed: %.0f wps\" %\n                      ((step+1) * 1.0 / epoch_size, ppl,\n                       iters * batch_size / (time.time() - start_time)))\n        _elapsed_time = time.time() - start_time\n        tf.logging.info(\"epoch time elapsed: %f\" % (_elapsed_time))\n        ppl = np.exp(loss / iters)\n        return ppl, _elapsed_time\n\n    # 3. set broadcase global variables from rank-0 process\n    bcast = hvd.broadcast_global_variables(0)\n\n    # 4. set visible GPU\n    session_config = tf.ConfigProto()\n    session_config.gpu_options.visible_device_list = str(hvd.local_rank())\n\n    with tf.Session(config=session_config) as sess:\n        sess.run(tf.global_variables_initializer())\n        sess.run(tf.local_variables_initializer())\n        sess.run(tf.tables_initializer())\n\n        # 5. run the broadcast_global_variables node before training\n        bcast.run()\n\n        _times = []\n        for epoch in range(config.num_epochs):\n            # Train\n            train_data_iter = ptb_iterator(\n                data[\"train_text_id\"], config.batch_size, num_steps,\n                is_train=True)\n            train_ppl, train_time = _run_epoch(\n                sess, train_data_iter, epoch, is_train=True, verbose=True)\n            _times.append(train_time)\n            tf.logging.info(\"Epoch: %d Train Perplexity: %.3f\" % (epoch, train_ppl))\n            # Valid in the main process\n            if hvd.rank() == 0:\n                valid_data_iter = ptb_iterator(\n                    data[\"valid_text_id\"], config.batch_size, num_steps)\n                valid_ppl, _ = _run_epoch(sess, valid_data_iter, epoch)\n                tf.logging.info(\"Epoch: %d Valid Perplexity: %.3f\"\n                                % (epoch, valid_ppl))\n\n        tf.logging.info('train times: %s' % (_times))\n        tf.logging.info('average train time/epoch %f'\n                        % np.mean(np.array(_times)))\n        # Test in the main process\n        if hvd.rank() == 0:\n            test_data_iter = ptb_iterator(\n                data[\"test_text_id\"], batch_size, num_steps)\n            test_ppl, _ = _run_epoch(sess, test_data_iter, 0)\n            tf.logging.info(\"Test Perplexity: %.3f\" % (test_ppl))\n\nif __name__ == '__main__':\n    tf.app.run(main=_main)\n"
  },
  {
    "path": "texar_repo/examples/distributed_gpu/ptb_reader.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Utilities for preprocessing and iterating over the PTB data.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=invalid-name, too-many-locals\n\nimport os\nimport numpy as np\n\nimport tensorflow as tf\nimport horovod.tensorflow as hvd\nimport texar as tx\n\ndef ptb_iterator(data, batch_size, num_steps, is_train=False):\n    \"\"\"Iterates through the ptb data.\n    \"\"\"\n\n    data_length = len(data)\n\n    batch_length = data_length // batch_size\n    data = np.asarray(data[:batch_size*batch_length])\n    data = data.reshape([batch_size, batch_length])\n\n    epoch_size = (batch_length - 1) // num_steps\n    if epoch_size == 0:\n        raise ValueError(\"epoch_size == 0, decrease batch_size or num_steps\")\n\n    def _sharded_data(data):\n        _batch_size = len(data)\n        _shard_size = _batch_size // hvd.size()\n        data = [data[i*_shard_size: (i+1) * _shard_size]\n                for i in range(_shard_size)]\n        data = data[hvd.rank()]\n        return data\n\n    if is_train:\n        # split the dataset into shards to make sure\n        # different processed are loaded with different training data\n        data = _sharded_data(data)\n\n    for i in range(epoch_size):\n        x = data[:, i * num_steps : (i+1) * num_steps]\n        y = data[:, i * num_steps + 1 : (i+1) * num_steps + 1]\n        yield (x, y)\n\ndef prepare_data(data_path):\n    \"\"\"Preprocess PTB data.\n    \"\"\"\n    train_path = os.path.join(data_path, \"ptb.train.txt\")\n    if not tf.gfile.Exists(train_path):\n        url = 'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz'\n        tx.data.maybe_download(url, data_path, extract=True)\n        data_path = os.path.join(data_path, 'simple-examples', 'data')\n\n    train_path = os.path.join(data_path, \"ptb.train.txt\")\n    valid_path = os.path.join(data_path, \"ptb.valid.txt\")\n    test_path = os.path.join(data_path, \"ptb.test.txt\")\n\n    word_to_id = tx.data.make_vocab(\n        train_path, newline_token=\"<EOS>\", return_type=\"dict\")\n    assert len(word_to_id) == 10000\n\n    train_text = tx.data.read_words(\n        train_path, newline_token=\"<EOS>\")\n    train_text_id = [word_to_id[w] for w in train_text if w in word_to_id]\n\n    valid_text = tx.data.read_words(\n        valid_path, newline_token=\"<EOS>\")\n    valid_text_id = [word_to_id[w] for w in valid_text if w in word_to_id]\n\n    test_text = tx.data.read_words(\n        test_path, newline_token=\"<EOS>\")\n    test_text_id = [word_to_id[w] for w in test_text if w in word_to_id]\n\n    data = {\n        \"train_text\": train_text,\n        \"valid_text\": valid_text,\n        \"test_text\": test_text,\n        \"train_text_id\": train_text_id,\n        \"valid_text_id\": valid_text_id,\n        \"test_text_id\": test_text_id,\n        \"vocab\": word_to_id,\n        \"vocab_size\": len(word_to_id)\n    }\n    return data\n"
  },
  {
    "path": "texar_repo/examples/hierarchical_dialog/README.md",
    "content": "# Hierarchical Recurrent Encoder-Decoder (HRED) Dialogue Model\n\nThis example builds a HRED dialogue model described in [(Serban et al. 2016) Building End-To-End Dialogue Systems Using Generative Hierarchical Neural Network Models](https://arxiv.org/abs/1507.04808). \n\nThe dataset used here is provided by [(Zhao et al. 2017) Learning Discourse-level Diversity for Neural Dialog Models using Conditional Variational Autoencoders](https://arxiv.org/abs/1703.10960), which adapts [switchboard-1 Release 2](https://catalog.ldc.upenn.edu/ldc97s62). In particular, for evaluation purpose, multiple reference responses for each dialog context in the test set are collected through manual annotations. \n\nThis example demonstrates:\n* Use of `MultiAlignedData` to read parallel data with multiple fields, e.g., (source, target, meta, ...)\n* Use of the `'variable_utterance'` hyperparameter in TextData to read dialog history data.\n* Use of the `'embedding_init'` hyperparameter in TextData to read pre-trained word embedding as initialization. \n* Use of `HierarchicalRNNEncoder` to encode dialog history with utterance-level and word-level encoding.\n* Use of *beam search decoding* and *random sample decoding* at inference time. \n* Addition of speaker meta-data in the encoder-decoder model.\n\n## Usage\n\n### Dataset\n\nDownload and preprocess the data with the following cmd:\n```\npython sw_loader.py\n```\n* Train/dev/test sets contain 200K, 5K, 5K examples, respectively.\n* Vocab size is 10,000.\n* `./data/switchboard/embedding.txt` contains word embeddings extracted from [glove.twitter.27B.200d](https://nlp.stanford.edu/projects/glove). You can also directly use the original glove.twitter.27B.200d file, and the Texar TextData module will automatically extract relevant embeddings for the vocabulary. \n\n### Train the model\n\nTo train the model, run\n\n```\npython hred.py --config_data config_data --config_model config_model_biminor \n```\nEvaluation will be performed after each epoch. \n\nHere:\n* `--config_data` specifies the data configuration.\n* `--config_model` specifies the model configuration. Note not to include the `.py` suffix. Two configs are provided:\n  - [biminor.py](./config_model_biminor.py) uses a bi-directional RNN as the word-level (minor-level) encoder\n  - [uniminor.py](./config_model_uniminor.py) uses a uni-directional RNN as the word-level (minor-level) encoder\n\nBoth configs use a uni-directional RNN for the utterance-level (major-level) encoder\n\n## Results\n\nThe table shows results of perplexity and BLEU after 10 epochs, comparing the results of [(Zhao et al. 2017)](https://arxiv.org/abs/1703.10960) (See \"Baseline\" of Table.1 in the paper). Note that:\n* We report results of random sample decoding, which performs slightly better than beam search decoding. \n* `num_samples` is the number of samples generated for each test instances (for computing precision and recall of BLEU). See sec.5.2 of the paper for the definition of the metrics.\n* (Zhao et al. 2017) uses more meta data besides the speaker meta-data here.\n* Results may vary a bit due to randomness.\n\n|               | biminor<br>num_samples=10   | biminor<br>num_samples=5 | Zhao et al.<br>num_samples=5 |\n| --------------| ---------------| --------------| --------------|\n| Perlexity     | 23.79          | 24.26         | 35.4   |\n| BLEU-1 recall | 0.478          | 0.386         | 0.405  |\n| BLEU-1 prec   | 0.379          | 0.395         | 0.336  |\n| BLEU-2 recall | 0.391          | 0.319         | 0.300  |\n| BLEU-2 prec   | 0.310          | 0.324         | 0.281  |\n| BLEU-3 recall | 0.330          | 0.270         | 0.272  |\n| BLEU-3 prec   | 0.259          | 0.272         | 0.254  |\n| BLEU-4 recall | 0.262          | 0.216         | 0.226  |\n| BLEU-4 prec   | 0.204          | 0.215         | 0.215  |\n"
  },
  {
    "path": "texar_repo/examples/hierarchical_dialog/config_data.py",
    "content": "import os\n\ndata_root = './data'\nmax_utterance_cnt = 9\n\ndata_hparams = {\n    stage: {\n        \"num_epochs\": 1,\n        \"shuffle\": stage != 'test',\n        \"batch_size\": 30,\n        \"datasets\": [\n            { # source\n                \"variable_utterance\": True,\n                \"max_utterance_cnt\": max_utterance_cnt,\n                \"files\": [\n                    os.path.join(data_root,\n                                 '{}-source.txt'.format(stage))],\n                \"vocab_file\": os.path.join(data_root, 'vocab.txt'),\n                \"embedding_init\": {\n                    \"file\": os.path.join(data_root, 'embedding.txt'),\n                    \"dim\": 200,\n                    \"read_fn\": \"load_glove\"\n                },\n                \"data_name\": \"source\"\n            },\n            { # target\n                \"files\": [\n                    os.path.join(data_root, '{}-target.txt'.format(stage))],\n                \"vocab_share_with\": 0,\n                \"data_name\": \"target\"\n            },\n        ] + [{ # source speaker token\n                \"files\": os.path.join(data_root,\n                                      '{}-source-spk-{}.txt'.format(stage, i)),\n                \"data_type\": \"float\",\n                \"data_name\": \"spk_{}\".format(i)\n            } for i in range(max_utterance_cnt)\n        ] + [{ # target speaker token\n                \"files\": os.path.join(data_root,\n                                      '{}-target-spk.txt'.format(stage)),\n                \"data_type\": \"float\",\n                \"data_name\": \"spk_tgt\"\n            }\n        ] + [{ # target refs for BLEU evaluation\n                \"variable_utterance\": True,\n                \"max_utterance_cnt\": 10,\n                \"files\": [os.path.join(data_root,\n                                       '{}-target-refs.txt'.format(stage))],\n                \"vocab_share_with\": 0,\n                \"data_name\": \"refs\"\n            }]\n    }\n    for stage in ['train', 'val', 'test']\n}\n"
  },
  {
    "path": "texar_repo/examples/hierarchical_dialog/config_model_biminor.py",
    "content": "\nimport tensorflow as tf\n\nnum_samples = 10 # Number of samples generated for each test data instance\nbeam_width = num_samples\n\nencoder_hparams = {\n    \"encoder_minor_type\": \"BidirectionalRNNEncoder\",\n    \"encoder_minor_hparams\": {\n        \"rnn_cell_fw\": {\n            \"type\": \"GRUCell\",\n            \"kwargs\": {\n                \"num_units\": 300,\n                \"kernel_initializer\": tf.initializers.random_uniform(-0.08, 0.08)\n            },\n            \"dropout\": {\n                \"input_keep_prob\": 0.5,\n            }\n        },\n        \"rnn_cell_share_config\": True\n    },\n    \"encoder_major_type\": \"UnidirectionalRNNEncoder\",\n    \"encoder_major_hparams\": {\n        \"rnn_cell\": {\n            \"type\": \"GRUCell\",\n            \"kwargs\": {\n                \"num_units\": 600,\n                \"kernel_initializer\": tf.initializers.random_uniform(-0.08, 0.08)\n            },\n            \"dropout\": {\n                \"output_keep_prob\": 0.3\n            }\n        }\n    }\n}\ndecoder_hparams = {\n    \"rnn_cell\": {\n        \"type\": \"GRUCell\",\n        \"kwargs\": {\n            \"num_units\": 400,\n            \"kernel_initializer\": tf.initializers.random_uniform(-0.08, 0.08),\n        },\n        \"dropout\": {\n            \"input_keep_prob\": 0.3\n        }\n    }\n}\nopt_hparams = {\n    \"optimizer\": {\n        \"type\": \"AdamOptimizer\",\n        \"kwargs\": {\n            \"learning_rate\": 0.001,\n        }\n    },\n    ## (It looks gradient clip does not affect the results a lot)\n    #\"gradient_clip\": {\n    #    \"type\": \"clip_by_global_norm\",\n    #    \"kwargs\": {\"clip_norm\": 5.}\n    #},\n}\n"
  },
  {
    "path": "texar_repo/examples/hierarchical_dialog/config_model_uniminor.py",
    "content": "\nimport tensorflow as tf\n\nnum_samples = 10 # Number of samples generated for each test data instance\nbeam_width = num_samples\n\nencoder_hparams = {\n    \"encoder_minor_type\": \"UnidirectionalRNNEncoder\",\n    \"encoder_minor_hparams\": {\n        \"rnn_cell\": {\n            \"type\": \"GRUCell\",\n            \"kwargs\": {\n                \"num_units\": 300,\n                \"kernel_initializer\": tf.initializers.random_uniform(-0.08, 0.08)\n            },\n            \"dropout\": {\n                \"input_keep_prob\": 0.5,\n            }\n        },\n    },\n    \"encoder_major_type\": \"UnidirectionalRNNEncoder\",\n    \"encoder_major_hparams\": {\n        \"rnn_cell\": {\n            \"type\": \"GRUCell\",\n            \"kwargs\": {\n                \"num_units\": 600,\n                \"kernel_initializer\": tf.initializers.random_uniform(-0.08, 0.08)\n            },\n            \"dropout\": {\n                \"input_keep_prob\": 0.3,\n            }\n        }\n    }\n}\ndecoder_hparams = {\n    \"rnn_cell\": {\n        \"type\": \"GRUCell\",\n        \"kwargs\": {\n            \"num_units\": 400,\n            \"kernel_initializer\": tf.initializers.random_uniform(-0.08, 0.08),\n        },\n        \"dropout\": {\n            \"output_keep_prob\": 0.3,\n        }\n    }\n}\nopt_hparams = {\n    \"optimizer\": {\n        \"type\": \"AdamOptimizer\",\n        \"kwargs\": {\n            \"learning_rate\": 0.001,\n        }\n    }\n}\n"
  },
  {
    "path": "texar_repo/examples/hierarchical_dialog/hred.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Hierarchical Recurrent Encoder-Decoder (HRED) for dialog response\ngeneration.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=invalid-name, too-many-locals\n\nimport importlib\nimport numpy as np\nimport tensorflow as tf\nimport texar as tx\n\nfrom nltk.translate.bleu_score import sentence_bleu\nfrom nltk.translate.bleu_score import SmoothingFunction\n\nflags = tf.flags\n\nflags.DEFINE_string('config_data', 'config_data', 'The data config')\nflags.DEFINE_string('config_model', 'config_model_biminor', 'The model config')\n\nFLAGS = flags.FLAGS\n\nconfig_data = importlib.import_module(FLAGS.config_data)\nconfig_model = importlib.import_module(FLAGS.config_model)\n\nencoder_hparams = config_model.encoder_hparams\ndecoder_hparams = config_model.decoder_hparams\nopt_hparams = config_model.opt_hparams\n\ndef main():\n    \"\"\"Entrypoint.\n    \"\"\"\n    # Data\n    train_data = tx.data.MultiAlignedData(config_data.data_hparams['train'])\n    val_data = tx.data.MultiAlignedData(config_data.data_hparams['val'])\n    test_data = tx.data.MultiAlignedData(config_data.data_hparams['test'])\n    iterator = tx.data.TrainTestDataIterator(train=train_data,\n                                             val=val_data,\n                                             test=test_data)\n    data_batch = iterator.get_next()\n\n    # (speaker's meta info)\n    spk_src = tf.stack([data_batch['spk_{}'.format(i)]\n                        for i in range(config_data.max_utterance_cnt)], 1)\n    spk_tgt = data_batch['spk_tgt']\n    def _add_source_speaker_token(x):\n        return tf.concat([x, tf.reshape(spk_src, (-1, 1))], 1)\n    def _add_target_speaker_token(x):\n        return (x, ) + (tf.reshape(spk_tgt, (-1, 1)), )\n\n    # HRED model\n    embedder = tx.modules.WordEmbedder(\n        init_value=train_data.embedding_init_value(0).word_vecs)\n    encoder = tx.modules.HierarchicalRNNEncoder(hparams=encoder_hparams)\n\n    decoder = tx.modules.BasicRNNDecoder(\n        hparams=decoder_hparams, vocab_size=train_data.vocab(0).size)\n\n    connector = tx.modules.connectors.MLPTransformConnector(\n        decoder.cell.state_size)\n\n    context_embed = embedder(data_batch['source_text_ids'])\n    ecdr_states = encoder(\n        context_embed,\n        medium=['flatten', _add_source_speaker_token],\n        sequence_length_minor=data_batch['source_length'],\n        sequence_length_major=data_batch['source_utterance_cnt'])\n    ecdr_states = ecdr_states[1]\n\n    ecdr_states = _add_target_speaker_token(ecdr_states)\n    dcdr_states = connector(ecdr_states)\n\n    # (decoding for training)\n    target_embed = embedder(data_batch['target_text_ids'])\n    outputs, _, lengths = decoder(\n        initial_state=dcdr_states,\n        inputs=target_embed,\n        sequence_length=data_batch['target_length'] - 1)\n\n    # Sentence level lld, for training\n    mle_loss = tx.losses.sequence_sparse_softmax_cross_entropy(\n        labels=data_batch['target_text_ids'][:, 1:],\n        logits=outputs.logits,\n        sequence_length=lengths)\n    # Token level lld, for perplexity evaluation\n    avg_mle_loss = tx.losses.sequence_sparse_softmax_cross_entropy(\n        labels=data_batch['target_text_ids'][:, 1:],\n        logits=outputs.logits,\n        sequence_length=lengths,\n        sum_over_timesteps=False,\n        average_across_timesteps=True)\n    perplexity = tf.exp(avg_mle_loss)\n\n    global_step = tf.Variable(0, name='global_step', trainable=True)\n    train_op = tx.core.get_train_op(\n        mle_loss, global_step=global_step, hparams=opt_hparams)\n\n    # Decoding\n\n    target_bos_token_id = train_data.vocab(0).bos_token_id\n    target_eos_token_id = train_data.vocab(0).eos_token_id\n    start_tokens = \\\n        tf.ones_like(data_batch['target_length']) * target_bos_token_id\n\n    # Random sample decoding\n    decoding_strategy = 'infer_' + 'sample'\n    infer_samples, lengths = [], []\n    for _ in range(config_model.num_samples):\n        infer_outputs_i, _, lengths_i = decoder(\n            decoding_strategy=decoding_strategy,\n            initial_state=dcdr_states,\n            start_tokens=start_tokens,\n            end_token=target_eos_token_id,\n            embedding=embedder,\n            max_decoding_length=50)\n        infer_samples.append(\n            tf.expand_dims(infer_outputs_i.sample_id, axis=2))\n        lengths.append(tf.expand_dims(lengths_i, axis=1))\n\n    infer_samples = tx.utils.pad_and_concat(\n        infer_samples, axis=2, pad_axis=1)\n    rand_sample_text = train_data.vocab(0).map_ids_to_tokens(infer_samples)\n    rand_lengths = tf.concat(lengths, axis=1)\n\n    # Beam search decoding\n    beam_search_samples, beam_states, _ = tx.modules.beam_search_decode(\n        decoder,\n        initial_state=dcdr_states,\n        start_tokens=start_tokens,\n        end_token=target_eos_token_id,\n        embedding=embedder,\n        beam_width=config_model.beam_width,\n        max_decoding_length=50)\n\n    beam_sample_text = train_data.vocab(0).map_ids_to_tokens(\n        beam_search_samples.predicted_ids)\n    beam_lengths = beam_states.lengths\n\n    # Running procedures\n\n    def _train_epoch(sess, epoch, display=1000):\n        iterator.switch_to_train_data(sess)\n\n        while True:\n            try:\n                feed = {tx.global_mode(): tf.estimator.ModeKeys.TRAIN}\n                step, loss, _ = sess.run(\n                    [global_step, mle_loss, train_op], feed_dict=feed)\n\n                if step % display == 0:\n                    print('step {} at epoch {}: loss={}'.format(\n                        step, epoch, loss))\n\n            except tf.errors.OutOfRangeError:\n                break\n\n        print('epoch {} train: loss={}'.format(epoch, loss))\n\n    def _test_epoch_ppl(sess, epoch):\n        iterator.switch_to_test_data(sess)\n\n        pples = []\n        while True:\n            try:\n                feed = {tx.global_mode(): tf.estimator.ModeKeys.EVAL}\n                ppl = sess.run(perplexity, feed_dict=feed)\n                pples.append(ppl)\n\n            except tf.errors.OutOfRangeError:\n                avg_ppl = np.mean(pples)\n                print('epoch {} perplexity={}'.format(epoch, avg_ppl))\n                break\n\n    def _test_epoch_bleu(sess, epoch, sample_text, sample_lengths):\n        iterator.switch_to_test_data(sess)\n\n        bleu_prec = [[] for i in range(1, 5)]\n        bleu_recall = [[] for i in range(1, 5)]\n\n        def _bleus(ref, sample):\n            res = []\n            for weight in [[1, 0, 0, 0],\n                           [1, 0, 0, 0],\n                           [1/2., 1/2., 0, 0],\n                           [1/3., 1/3., 1/3., 0],\n                           [1/4., 1/4., 1/4., 1/4.]]:\n                res.append(sentence_bleu(\n                    [ref],\n                    sample,\n                    smoothing_function=SmoothingFunction().method7,\n                    weights=weight))\n            return res\n\n        while True:\n            try:\n                feed = {tx.global_mode(): tf.estimator.ModeKeys.EVAL}\n\n                samples_, sample_lengths_, references, refs_cnt = \\\n                    sess.run([sample_text,\n                              sample_lengths,\n                              data_batch['refs_text'][:, :, 1:],\n                              data_batch['refs_utterance_cnt']],\n                             feed_dict=feed)\n\n                samples_ = np.transpose(samples_, (0, 2, 1))\n                samples_ = [\n                    [sample[:l] for sample, l in zip(beam, lens)]\n                    for beam, lens in zip(samples_.tolist(), sample_lengths_)\n                ]\n                references = [\n                    [ref[:ref.index(b'<EOS>')] for ref in refs[:cnt]]\n                    for refs, cnt in zip(references.tolist(), refs_cnt)\n                ]\n\n                for beam, refs in zip(samples_, references):\n                    bleu_scores = [\n                        [_bleus(ref, sample) for ref in refs]\n                        for sample in beam\n                    ]\n                    bleu_scores = np.transpose(np.array(bleu_scores), (2, 0, 1))\n\n                    for i in range(1, 5):\n                        bleu_i = bleu_scores[i]\n                        bleu_i_precision = bleu_i.max(axis=1).mean()\n                        bleu_i_recall = bleu_i.max(axis=0).mean()\n\n                        bleu_prec[i-1].append(bleu_i_precision)\n                        bleu_recall[i-1].append(bleu_i_recall)\n\n\n            except tf.errors.OutOfRangeError:\n                break\n\n        bleu_prec = [np.mean(x) for x in bleu_prec]\n        bleu_recall = [np.mean(x) for x in bleu_recall]\n\n        print('epoch {}:'.format(epoch))\n        for i in range(1, 5):\n            print(' -- bleu-{} prec={}, recall={}'.format(\n                i, bleu_prec[i-1], bleu_recall[i-1]))\n\n    with tf.Session() as sess:\n        sess.run(tf.global_variables_initializer())\n        sess.run(tf.local_variables_initializer())\n        sess.run(tf.tables_initializer())\n\n        num_epochs = 10\n        for epoch in range(1, num_epochs+1):\n            _train_epoch(sess, epoch)\n            _test_epoch_ppl(sess, epoch)\n\n            if epoch % 5 == 0:\n                print('random sample: ')\n                _test_epoch_bleu(sess, epoch, rand_sample_text, rand_lengths)\n                print('beam-search: ')\n                _test_epoch_bleu(sess, epoch, beam_sample_text, beam_lengths)\n\n        if num_epochs % 5 != 0:\n            print('random sample: ')\n            _test_epoch_bleu(sess, num_epochs, rand_sample_text, rand_lengths)\n            print('beam-search: ')\n            _test_epoch_bleu(sess, num_epochs, beam_sample_text, beam_lengths)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "texar_repo/examples/hierarchical_dialog/sw_loader.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\" loader for switch board dataset.\n\"\"\"\nimport os\nimport json\nfrom json_lines import reader\n\nfrom nltk.tokenize import WordPunctTokenizer\nfrom sklearn.feature_extraction.text import TfidfVectorizer\n\nimport texar as tx\n\nfrom config_data import data_root\n\n# pylint: disable=invalid-name, too-many-locals\n\nwnd_sz = 10\n\nclass Dataset(object):\n    \"\"\"Data preprocessor.\n    \"\"\"\n\n    def __init__(self, jsonl_path, mode=None):\n        self.mode = mode\n        self.raw = []\n        self.lst = []\n        self.refs = []\n        if mode == 'test':\n            lst = json.load(open(jsonl_path, 'r'))\n            for item in lst:\n                context = item['context']\n                dialog = []\n                for utts in context:\n                    p = utts.find(':')\n                    dialog.append((\n                        (utts[p-1] == 'A') * 2 - 1, utts[p + 2:-1], 0))\n\n                if dialog[0][1][-1] == '>':\n                    dialog = dialog[1:]\n\n                if len(dialog) == 0:\n                    continue\n\n                responses = []\n                for resp in item['responses']:\n                    responses.append(resp)\n\n                spk = (item['speaker'] == 'A') * 2 - 1\n                dialog.append((spk, responses[0], 0))\n                responses = responses[1:]\n                responses = [' '.join(WordPunctTokenizer().tokenize(resp))\n                             for resp in responses]\n\n                if len(responses) == 0:\n                    continue\n\n                self.raw.append(dialog)\n                self.lst.append((len(self.raw) - 1, 0, len(dialog)))\n                self.refs.append(responses)\n\n            return\n\n        from collections import Counter\n        self.ct = Counter()\n        self.topics = []\n        with open(jsonl_path, 'r') as f:\n            for idx, item in enumerate(reader(f)):\n                utts = item['utts']\n                self.topics.append(item['topic'])\n                self.raw.append([(int(speaker == 'A') * 2 - 1, sentence, _)\n                                 for speaker, sentence, _ in utts])\n\n                lst = [(idx, start, start + wnd_sz)\n                       for start in range(0, len(utts)-wnd_sz)] + \\\n                      [(idx, 0, end)\n                       for end in range(2, min(wnd_sz+1, len(utts)))]\n\n                self.lst += lst\n\n        self.refs = [['none']] * len(self.lst)\n\n    def __len__(self):\n        return len(self.lst)\n\n    def __getitem__(self, idx):\n        idx, start, end = self.lst[idx]\n        dialog = self.raw[idx][start:end]\n        source, target = dialog[:-1], dialog[-1]\n\n        spks, utts = list(zip(*[(speaker, WordPunctTokenizer().tokenize(uttr)) for speaker, uttr, _ in source]))\n\n        spks = list(spks)\n\n        while len(spks) < 10:\n            spks.append(0)\n\n        source = '|||'.join([' '.join(uttr) for uttr in utts])\n        target_test = ' '.join(WordPunctTokenizer().tokenize(target[1]))\n\n        return spks, source, target_test, target[0]\n\n    def get(self, idx):\n        idx, start, end = self.lst[idx]\n        source = self.raw[idx][start:end-1]\n        target = self.raw[idx][end-1]\n        source = ' '.join([b for a, b, c in source])\n        cct = self.raw[idx][end-2][0] == self.raw[idx][end-1][0]\n        return self.topics[idx], cct, source, target\n\ndef sw1c2r(data_root):\n    dts_train = Dataset(os.path.join(data_root, 'train.jsonl'))\n    dts_valid = Dataset(os.path.join(data_root, 'valid.jsonl'))\n    dts_test = Dataset(os.path.join(data_root, 'test_multi_ref.json'), 'test')\n    datasets = {\n        'train': dts_train,\n        'val': dts_valid,\n        'test': dts_test\n    }\n    return datasets\n\ndef generate_reference_for_test_dialog(dataset, data_root):\n    vocab = {}\n    with open(os.path.join(data_root, 'vocab.txt'), 'r') as f:\n        p = f.read().splitlines()\n        for i, x in enumerate(p):\n            vocab[x] = i\n\n    dts_train = dataset['train']\n    dts_val = dataset['val']\n    dts_test = dataset['test']\n\n    vectorizer = TfidfVectorizer(tokenizer=WordPunctTokenizer().tokenize,\n                                 vocabulary=vocab)\n\n    saved = []\n    meta = []\n    data = []\n    tidx = {}\n    for i in range(len(dts_test)):\n        topic, cct, source, target = dts_test.get(i)\n        meta.append((topic, cct, target))\n        data.append(source)\n\n    for i in range(len(dts_train)):\n        topic, cct, source, target = dts_train.get(i)\n        saved.append((topic, cct, target))\n        data.append(source)\n\n        if topic not in tidx:\n            tidx[topic] = []\n        tidx[topic].append(i)\n\n    result = vectorizer.fit_transform(data)\n    x = result[:len(dts_test)]\n    y = result[len(dts_test):]\n\n    from tqdm import tqdm\n    from sklearn.preprocessing import normalize\n\n    y = normalize(y)\n    x = normalize(x)\n\n    dts_test.refs = []\n    for i in tqdm(range(len(dts_test))):\n        c = tidx[meta[i][0]]\n        p = (y * x[i].T).toarray().reshape(-1)[c]\n        d = p.argsort()\n\n        cnt = 0\n        refs = []\n        for a in d[::-1]:\n            if saved[a][1] == meta[i][1]:\n                refs.append(' '.join(\n                    WordPunctTokenizer().tokenize(saved[a][2][1])))\n                cnt += 1\n                if cnt == 10:\n                    break\n\n        dts_test.refs.append(refs)\n\ndef download_and_process(data_root):\n    if not os.path.isdir(data_root):\n        os.makedirs(data_root)\n        os.makedirs(os.path.join(data_root, 'raw'))\n\n        tx.data.maybe_download(\n            urls='https://drive.google.com/file/d/1Gytd-SSetUkIY6aVVKNrBOxkHjAlSGeU/view?usp=sharing',\n            path='./',\n            filenames=os.path.join(data_root, 'sw1c2r.tar.gz'),\n            extract=True)\n\n        os.system('mv {} {}'.format(os.path.join(data_root, 'sw1c2r.tar.gz'),\n                                    os.path.join(data_root, 'raw/sw1c2r.tar.gz')))\n        os.system('mv {}/* {}'.format(\n            os.path.join(data_root, 'switchboard'), data_root))\n\n        datasets = sw1c2r(os.path.join(data_root, 'json_data'))\n\n        for stage in ['train', 'val', 'test']:\n            dts = datasets[stage]\n            spk, src, tgt, meta = list(zip(*[dts[i] for i in range(len(dts))]))\n            src_txt = '\\n'.join(src)\n            tgt_txt = '\\n'.join(tgt)\n\n            spk = list(zip(*spk))\n\n            for i in range(len(spk)):\n                with open(os.path.join(data_root, '{}-source-spk-{}.txt'.format(stage, i)), 'w') as f:\n                    f.write('\\n'.join([str(a) for a in spk[i]]))\n\n            spk_tgt = meta\n\n            with open(os.path.join(data_root, '{}-target-spk.txt'.format(stage)), 'w') as f:\n                f.write('\\n'.join([str(a) for a in spk_tgt]))\n\n            with open(os.path.join(data_root, '{}-source.txt'.format(stage)), 'w') as f:\n                f.write(src_txt)\n            with open(os.path.join(data_root, '{}-target.txt'.format(stage)), 'w') as f:\n                f.write(tgt_txt)\n\n            with open(os.path.join(data_root, '{}-target-refs.txt'.format(stage)), 'w') as f:\n                f.write('\\n'.join(['|||'.join(v) for v in dts.refs]))\n\nif __name__ == '__main__':\n    download_and_process(data_root)\n"
  },
  {
    "path": "texar_repo/examples/language_model_ptb/README.md",
    "content": "# Language Model on PTB #\n\nThis example builds an LSTM language model, and trains on PTB data. Model and training are described in   \n[(Zaremba, et. al.) Recurrent Neural Network Regularization](https://arxiv.org/pdf/1409.2329.pdf). This is a reimpmentation of the TensorFlow official PTB example in [tensorflow/models/rnn/ptb](https://github.com/tensorflow/models/tree/master/tutorials/rnn/ptb).\n\nThe example shows:\n  * Contruction of simple model, involving the `Embedder` and `RNN Decoder`.\n  * Use of Texar with external Python data pipeline ([ptb_reader.py](./ptb_reader.py)).\n  * Specification of various features of train op, like *gradient clipping* and *lr decay*.\n\n## Usage ##\n\nThe following cmd trains a small-size model:\n\n```\npython lm_ptb.py [--config config_small] [--data_path ./]\n```\n\nHere:\n  * `--config` specifies the config file to use. E.g., the above use the configuration defined in [config_small.py](./config_small.py)\n  * `--data_path` specifies the directory containing PTB raw data (e.g., `ptb.train.txt`). If the data files do not exist, the program will automatically download, extract, and pre-process the data.\n\nThe model will begin training, and will evaluate on the validation data periodically, and evaluate on the test data after the training is done. \n\n## Results ##\n\nAs per the TensorFlow official PTB example, the perplexity of different configs is:\n\n| config | epochs | train | valid  | test  |\n| -------| -------| ------| -------| ------|\n| small  | 13     | 37.99 | 121.39 | 115.91|\n| medium | 39     | 48.45 |  86.16 |  82.07|\n| large  | 55     | 37.87 |  82.62 |  78.29|\n"
  },
  {
    "path": "texar_repo/examples/language_model_ptb/config_large.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"PTB LM large size config.\n\"\"\"\n\n# pylint: disable=invalid-name, too-few-public-methods, missing-docstring\n\ninit_scale = 0.04\nnum_epochs = 55\nhidden_size = 1500\nkeep_prob = 0.35\nbatch_size = 20\nnum_steps = 35\n\ncell = {\n    \"type\": \"LSTMBlockCell\",\n    \"kwargs\": {\n        \"num_units\": hidden_size,\n        \"forget_bias\": 0.\n    },\n    \"dropout\": {\"output_keep_prob\": keep_prob},\n    \"num_layers\": 2\n}\nemb = {\n    \"dim\": hidden_size\n}\nopt = {\n    \"optimizer\": {\n        \"type\": \"GradientDescentOptimizer\",\n        \"kwargs\": {\"learning_rate\": 1.0}\n    },\n    \"gradient_clip\": {\n        \"type\": \"clip_by_global_norm\",\n        \"kwargs\": {\"clip_norm\": 10.}\n    },\n    \"learning_rate_decay\": {\n        \"type\": \"exponential_decay\",\n        \"kwargs\": {\n            \"decay_steps\": 1,\n            \"decay_rate\": 1. / 1.15,\n            \"staircase\": True\n        },\n        \"start_decay_step\": 14\n    }\n}\n"
  },
  {
    "path": "texar_repo/examples/language_model_ptb/config_medium.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"PTB LM medium size config.\n\"\"\"\n\n# pylint: disable=invalid-name, too-few-public-methods, missing-docstring\n\ninit_scale = 0.05\nnum_epochs = 39\nhidden_size = 650\nkeep_prob = 0.5\nbatch_size = 20\nnum_steps = 35\n\ncell = {\n    \"type\": \"LSTMBlockCell\",\n    \"kwargs\": {\n        \"num_units\": hidden_size,\n        \"forget_bias\": 0.\n    },\n    \"dropout\": {\"output_keep_prob\": keep_prob},\n    \"num_layers\": 2\n}\nemb = {\n    \"dim\": hidden_size\n}\nopt = {\n    \"optimizer\": {\n        \"type\": \"GradientDescentOptimizer\",\n        \"kwargs\": {\"learning_rate\": 1.0}\n    },\n    \"gradient_clip\": {\n        \"type\": \"clip_by_global_norm\",\n        \"kwargs\": {\"clip_norm\": 5.}\n    },\n    \"learning_rate_decay\": {\n        \"type\": \"exponential_decay\",\n        \"kwargs\": {\n            \"decay_steps\": 1,\n            \"decay_rate\": 0.8,\n            \"staircase\": True\n        },\n        \"start_decay_step\": 5\n    }\n}\n"
  },
  {
    "path": "texar_repo/examples/language_model_ptb/config_small.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"PTB LM small size config.\n\"\"\"\n\n# pylint: disable=invalid-name, too-few-public-methods, missing-docstring\n\ninit_scale = 0.1\nnum_epochs = 13\nhidden_size = 200\nkeep_prob = 1.0\nbatch_size = 20\nnum_steps = 20\n\ncell = {\n    \"type\": \"LSTMBlockCell\",\n    \"kwargs\": {\n        \"num_units\": hidden_size,\n        \"forget_bias\": 0.\n    },\n    \"dropout\": {\"output_keep_prob\": keep_prob},\n    \"num_layers\": 2\n}\nemb = {\n    \"dim\": hidden_size\n}\nopt = {\n    \"optimizer\": {\n        \"type\": \"GradientDescentOptimizer\",\n        \"kwargs\": {\"learning_rate\": 1.0}\n    },\n    \"gradient_clip\": {\n        \"type\": \"clip_by_global_norm\",\n        \"kwargs\": {\"clip_norm\": 5.}\n    },\n    \"learning_rate_decay\": {\n        \"type\": \"exponential_decay\",\n        \"kwargs\": {\n            \"decay_steps\": 1,\n            \"decay_rate\": 0.5,\n            \"staircase\": True\n        },\n        \"start_decay_step\": 3\n    }\n}\n"
  },
  {
    "path": "texar_repo/examples/language_model_ptb/lm_ptb.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Example for building the language model.\n\nThis is a reimpmentation of the TensorFlow official PTB example in:\ntensorflow/models/rnn/ptb\n\nModel and training are described in:\n(Zaremba, et. al.) Recurrent Neural Network Regularization\n http://arxiv.org/abs/1409.2329\n\nThere are 3 provided model configurations:\n===========================================\n| config | epochs | train | valid  | test\n===========================================\n| small  | 13     | 37.99 | 121.39 | 115.91\n| medium | 39     | 48.45 |  86.16 |  82.07\n| large  | 55     | 37.87 |  82.62 |  78.29\nThe exact results may vary depending on the random initialization.\n\nThe data required for this example is in the `data/` dir of the\nPTB dataset from Tomas Mikolov's webpage:\n\n$ wget http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz\n$ tar xvf simple-examples.tgz\n\nIf data is not provided, the program will download from above automatically.\n\nTo run:\n\n$ python lm_ptb.py --data_path=simple-examples/data --config=config_small\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=invalid-name, no-member, too-many-locals\n\nimport time\nimport importlib\nimport numpy as np\nimport tensorflow as tf\nimport texar as tx\n\nfrom ptb_reader import prepare_data, ptb_iterator\n\nflags = tf.flags\n\nflags.DEFINE_string(\"data_path\", \"./\",\n                    \"Directory containing PTB raw data (e.g., ptb.train.txt). \"\n                    \"E.g., ./simple-examples/data. If not exists, \"\n                    \"the directory will be created and PTB raw data will \"\n                    \"be downloaded.\")\nflags.DEFINE_string(\"config\", \"config_small\", \"The config to use.\")\n\nFLAGS = flags.FLAGS\n\nconfig = importlib.import_module(FLAGS.config)\n\ndef _main(_):\n    # Data\n    batch_size = config.batch_size\n    num_steps = config.num_steps\n    data = prepare_data(FLAGS.data_path)\n    vocab_size = data[\"vocab_size\"]\n\n    inputs = tf.placeholder(tf.int32, [batch_size, num_steps])\n    targets = tf.placeholder(tf.int32, [batch_size, num_steps])\n\n    # Model architecture\n    initializer = tf.random_uniform_initializer(\n        -config.init_scale, config.init_scale)\n    with tf.variable_scope(\"model\", initializer=initializer):\n        embedder = tx.modules.WordEmbedder(\n            vocab_size=vocab_size, hparams=config.emb)\n        emb_inputs = embedder(inputs)\n        if config.keep_prob < 1:\n            emb_inputs = tf.nn.dropout(\n                emb_inputs, tx.utils.switch_dropout(config.keep_prob))\n\n        decoder = tx.modules.BasicRNNDecoder(\n            vocab_size=vocab_size, hparams={\"rnn_cell\": config.cell})\n        initial_state = decoder.zero_state(batch_size, tf.float32)\n        outputs, final_state, seq_lengths = decoder(\n            decoding_strategy=\"train_greedy\",\n            impute_finished=True,\n            inputs=emb_inputs,\n            sequence_length=[num_steps]*batch_size,\n            initial_state=initial_state)\n\n    # Losses & train ops\n    mle_loss = tx.losses.sequence_sparse_softmax_cross_entropy(\n        labels=targets,\n        logits=outputs.logits,\n        sequence_length=seq_lengths)\n\n    # Use global_step to pass epoch, for lr decay\n    global_step = tf.placeholder(tf.int32)\n    train_op = tx.core.get_train_op(\n        mle_loss, global_step=global_step, increment_global_step=False,\n        hparams=config.opt)\n\n    def _run_epoch(sess, data_iter, epoch, is_train=False, verbose=False):\n        start_time = time.time()\n        loss = 0.\n        iters = 0\n        state = sess.run(initial_state)\n\n        fetches = {\n            \"mle_loss\": mle_loss,\n            \"final_state\": final_state,\n        }\n        if is_train:\n            fetches[\"train_op\"] = train_op\n            epoch_size = (len(data[\"train_text_id\"]) // batch_size - 1)\\\n                // num_steps\n\n        mode = (tf.estimator.ModeKeys.TRAIN\n                if is_train\n                else tf.estimator.ModeKeys.EVAL)\n\n        for step, (x, y) in enumerate(data_iter):\n            feed_dict = {\n                inputs: x, targets: y, global_step: epoch,\n                tx.global_mode(): mode,\n            }\n            for i, (c, h) in enumerate(initial_state):\n                feed_dict[c] = state[i].c\n                feed_dict[h] = state[i].h\n\n            rets = sess.run(fetches, feed_dict)\n            loss += rets[\"mle_loss\"]\n            state = rets[\"final_state\"]\n            iters += num_steps\n\n            ppl = np.exp(loss / iters)\n            if verbose and is_train and step % (epoch_size // 10) == 10:\n                print(\"%.3f perplexity: %.3f speed: %.0f wps\" %\n                      ((step+1) * 1.0 / epoch_size, ppl,\n                       iters * batch_size / (time.time() - start_time)))\n\n        ppl = np.exp(loss / iters)\n        return ppl\n\n    with tf.Session() as sess:\n        sess.run(tf.global_variables_initializer())\n        sess.run(tf.local_variables_initializer())\n        sess.run(tf.tables_initializer())\n\n        for epoch in range(config.num_epochs):\n            # Train\n            train_data_iter = ptb_iterator(\n                data[\"train_text_id\"], config.batch_size, num_steps)\n            train_ppl = _run_epoch(\n                sess, train_data_iter, epoch, is_train=True, verbose=True)\n            print(\"Epoch: %d Train Perplexity: %.3f\" % (epoch, train_ppl))\n            # Valid\n            valid_data_iter = ptb_iterator(\n                data[\"valid_text_id\"], config.batch_size, num_steps)\n            valid_ppl = _run_epoch(sess, valid_data_iter, epoch)\n            print(\"Epoch: %d Valid Perplexity: %.3f\" % (epoch, valid_ppl))\n        # Test\n        test_data_iter = ptb_iterator(\n            data[\"test_text_id\"], batch_size, num_steps)\n        test_ppl = _run_epoch(sess, test_data_iter, 0)\n        print(\"Test Perplexity: %.3f\" % (test_ppl))\n\nif __name__ == '__main__':\n    tf.app.run(main=_main)\n"
  },
  {
    "path": "texar_repo/examples/language_model_ptb/ptb_reader.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Utilities for preprocessing and iterating over the PTB data.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=invalid-name, too-many-locals\n\nimport os\nimport numpy as np\n\nimport tensorflow as tf\n\nimport texar as tx\n\ndef ptb_iterator(data, batch_size, num_steps):\n    \"\"\"Iterates through the ptb data.\n    \"\"\"\n    data_length = len(data)\n    batch_length = data_length // batch_size\n\n    data = np.asarray(data[:batch_size*batch_length])\n    data = data.reshape([batch_size, batch_length])\n\n    epoch_size = (batch_length - 1) // num_steps\n    if epoch_size == 0:\n        raise ValueError(\"epoch_size == 0, decrease batch_size or num_steps\")\n\n    for i in range(epoch_size):\n        x = data[:, i * num_steps : (i+1) * num_steps]\n        y = data[:, i * num_steps + 1 : (i+1) * num_steps + 1]\n        yield (x, y)\n\ndef prepare_data(data_path):\n    \"\"\"Preprocess PTB data.\n    \"\"\"\n    train_path = os.path.join(data_path, \"ptb.train.txt\")\n    if not tf.gfile.Exists(train_path):\n        url = 'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz'\n        tx.data.maybe_download(url, data_path, extract=True)\n        data_path = os.path.join(data_path, 'simple-examples', 'data')\n\n    train_path = os.path.join(data_path, \"ptb.train.txt\")\n    valid_path = os.path.join(data_path, \"ptb.valid.txt\")\n    test_path = os.path.join(data_path, \"ptb.test.txt\")\n\n    word_to_id = tx.data.make_vocab(\n        train_path, newline_token=\"<EOS>\", return_type=\"dict\")\n    assert len(word_to_id) == 10000\n\n    train_text = tx.data.read_words(\n        train_path, newline_token=\"<EOS>\")\n    train_text_id = [word_to_id[w] for w in train_text if w in word_to_id]\n\n    valid_text = tx.data.read_words(\n        valid_path, newline_token=\"<EOS>\")\n    valid_text_id = [word_to_id[w] for w in valid_text if w in word_to_id]\n\n    test_text = tx.data.read_words(\n        test_path, newline_token=\"<EOS>\")\n    test_text_id = [word_to_id[w] for w in test_text if w in word_to_id]\n\n    data = {\n        \"train_text\": train_text,\n        \"valid_text\": valid_text,\n        \"test_text\": test_text,\n        \"train_text_id\": train_text_id,\n        \"valid_text_id\": valid_text_id,\n        \"test_text_id\": test_text_id,\n        \"vocab\": word_to_id,\n        \"vocab_size\": len(word_to_id)\n    }\n    return data\n"
  },
  {
    "path": "texar_repo/examples/memory_network_lm/README.md",
    "content": "# End-to-End Memory Network for Language Modeling #\n\nThis example builds a Memory Network language model, and trains on PTB data. Model and training are described in   \n[(Sukhbaatar, et. al.) End-To-End Memory Networks](https://arxiv.org/pdf/1503.08895v4.pdf). Model details are implemented in `texar.modules.memnet`.\n\nThough the example is for language modeling, it is easy to adapt to other tasks like Question Answering, etc, as described in the above paper.\n\n## Dataset ##\n\nThe standard [Penn Treebank (PTB) dataset](http://www.fit.vutbr.cz/~imikolov/rnnlm/) is used. \n\nIf data does not exist under `data_path`, the program will automatically download the data. \n\n## Usage ##\n\nThe following cmd trains the model:\n\n```bash\npython3 lm_ptb_memnet.py --config config --data_path ./\n```\n\nHere:\n  * `--config` specifies the config file to use. E.g., the above use the configuration defined in [config.py](./config.py).\n  * `--data_path` specifies the directory containing PTB raw data (e.g., `ptb.train.txt`). If the data files do not exist, the program will automatically download, extract, and pre-process the data.\n  * `--lr` specifies the initial learning rate. If not specified, the program will use the learning rate in the config file.\n\nThe model will begin training, and will evaluate on the validation data periodically, and evaluate on the test data after the training is done. Checkpoints are saved every 5 epochs.\n\n## Configurations ##\n\n[config.py](./config.py) is the largest and best configuration described on the last line of Table 2 in [(Sukhbaatar, et. al.) End-To-End Memory Networks](https://arxiv.org/pdf/1503.08895v4.pdf). It sets number of hops to 7, hidden dim to 150, and memory size to 200. This model has 4,582,500 parameters in total.\n\n## Results ##\n\nThe perplexity of different configs is:\n\n| config        | epochs | train | valid  | test  |\n| ------------- | -------| ------| -------| ------|\n| config        | 51     | 50.70 | 120.97 | 113.06|\n\nThis result of `config.py` is slightly inferior to the result presented in the paper, since the result in the paper is the best among 10 runs.\n"
  },
  {
    "path": "texar_repo/examples/memory_network_lm/config.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\n# pylint: disable=invalid-name, too-few-public-methods, missing-docstring\n\nn_hops = 7\ndim = 150\nrelu_dim = dim // 2\nbatch_size = 128\nnum_epochs = 200\nmemory_size = 200\ninitialize_stddev = 0.05\nquery_constant = 0.1\nlearning_rate_anneal_factor = 1.5\nterminating_learning_rate = 1e-5\n\nopt = {\n    \"optimizer\": {\n        \"type\": \"GradientDescentOptimizer\",\n        \"kwargs\": {\"learning_rate\": 0.01}\n    },\n    \"gradient_clip\": {\n        \"type\": \"clip_by_global_norm\",\n        \"kwargs\": {\"clip_norm\": 50.}\n    },\n}\n\nembed = {\n    \"embedding\": {\n        \"dim\": dim,\n    },\n    \"temporal_embedding\": {\n        \"dim\": dim,\n    }\n}\n\nmemnet = {\n    \"n_hops\": n_hops,\n    \"relu_dim\": relu_dim,\n    \"memory_size\": memory_size,\n    \"A\": embed,\n    \"C\": embed,\n}\n"
  },
  {
    "path": "texar_repo/examples/memory_network_lm/lm_ptb_memnet.py",
    "content": "#!/usr/bin/env python3\n# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Example for building the PTB language model with Memory Network.\n\nMemory Network model is described in https://arxiv.org/abs/1503.08895v4\n\nThe data required for this example is in the `data/` dir of the\nPTB dataset from Tomas Mikolov's webpage:\n\n$ wget http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz\n$ tar xvf simple-examples.tgz\n\nIf data is now provided, the program will download from above automatically.\n\nTo run:\n\n$ python lm_ptb_memnet.py --data_path=simple-examples/data \\\n  --config=config\n\nThis code will automatically save and restore from directory `ckpt/`.\nIf the directory doesn't exist, it will be created automatically.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=invalid-name, no-member, too-many-locals\n\nimport importlib\nimport numpy as np\nimport tensorflow as tf\nimport texar as tx\n\nfrom ptb_reader import prepare_data\nfrom ptb_reader import ptb_iterator_memnet as ptb_iterator\n\nflags = tf.flags\n\nflags.DEFINE_string(\"data_path\", \"./\",\n                    \"Directory containing PTB raw data (e.g., ptb.train.txt). \"\n                    \"E.g., ./simple-examples/data. If not exists, \"\n                    \"the directory will be created and PTB raw data will \"\n                    \"be downloaded.\")\nflags.DEFINE_string(\"config\", \"config\", \"The config to use.\")\n\nFLAGS = flags.FLAGS\n\nconfig = importlib.import_module(FLAGS.config)\n\ndef _main(_):\n    # Data\n    batch_size = config.batch_size\n    memory_size = config.memory_size\n    terminating_learning_rate = config.terminating_learning_rate\n    data = prepare_data(FLAGS.data_path)\n    vocab_size = data[\"vocab_size\"]\n    print('vocab_size = {}'.format(vocab_size))\n\n    inputs = tf.placeholder(tf.int32, [None, memory_size], name=\"inputs\")\n    targets = tf.placeholder(tf.int32, [None], name=\"targets\")\n\n    # Model architecture\n    initializer = tf.random_normal_initializer(\n        stddev=config.initialize_stddev)\n    with tf.variable_scope(\"model\", initializer=initializer):\n        memnet = tx.modules.MemNetRNNLike(raw_memory_dim=vocab_size,\n                                          hparams=config.memnet)\n        queries = tf.fill([tf.shape(inputs)[0], config.dim],\n                          config.query_constant)\n        logits = memnet(inputs, queries)\n\n    # Losses & train ops\n    mle_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(\n        labels=targets, logits=logits)\n    mle_loss = tf.reduce_sum(mle_loss)\n\n    # Use global_step to pass epoch, for lr decay\n    lr = config.opt[\"optimizer\"][\"kwargs\"][\"learning_rate\"]\n    learning_rate = tf.placeholder(tf.float32, [], name=\"learning_rate\")\n    global_step = tf.Variable(0, dtype=tf.int32, name=\"global_step\")\n    increment_global_step = tf.assign_add(global_step, 1)\n    train_op = tx.core.get_train_op(\n        mle_loss,\n        learning_rate=learning_rate,\n        global_step=global_step,\n        increment_global_step=False,\n        hparams=config.opt)\n\n    def _run_epoch(sess, data_iter, epoch, is_train=False):\n        loss = 0.\n        iters = 0\n\n        fetches = {\n            \"mle_loss\": mle_loss\n        }\n        if is_train:\n            fetches[\"train_op\"] = train_op\n\n        mode = (tf.estimator.ModeKeys.TRAIN\n                if is_train\n                else tf.estimator.ModeKeys.EVAL)\n\n        for _, (x, y) in enumerate(data_iter):\n            batch_size = x.shape[0]\n            feed_dict = {\n                inputs: x, targets: y, learning_rate: lr,\n                tx.global_mode(): mode,\n            }\n\n            rets = sess.run(fetches, feed_dict)\n            loss += rets[\"mle_loss\"]\n            iters += batch_size\n\n        ppl = np.exp(loss / iters)\n        return ppl\n\n    saver = tf.train.Saver()\n\n    with tf.Session() as sess:\n        sess.run(tf.global_variables_initializer())\n        sess.run(tf.local_variables_initializer())\n        sess.run(tf.tables_initializer())\n\n        try:\n            saver.restore(sess, \"ckpt/model.ckpt\")\n            print('restored checkpoint.')\n        except:\n            print('restore checkpoint failed.')\n\n        last_valid_ppl = None\n        heuristic_lr_decay = (hasattr(config, 'heuristic_lr_decay')\n                              and config.heuristic_lr_decay)\n        while True:\n            if lr < terminating_learning_rate:\n                break\n\n            epoch = sess.run(global_step)\n            if epoch >= config.num_epochs:\n                print('Too many epochs!')\n                break\n\n            print('epoch: {} learning_rate: {:.6f}'.format(epoch, lr))\n\n            # Train\n            train_data_iter = ptb_iterator(\n                data[\"train_text_id\"], batch_size, memory_size)\n            train_ppl = _run_epoch(\n                sess, train_data_iter, epoch, is_train=True)\n            print(\"Train Perplexity: {:.3f}\".format(train_ppl))\n            sess.run(increment_global_step)\n\n            # checkpoint\n            if epoch % 5 == 0:\n                try:\n                    saver.save(sess, \"ckpt/model.ckpt\")\n                    print(\"saved checkpoint.\")\n                except:\n                    print(\"save checkpoint failed.\")\n\n            # Valid\n            valid_data_iter = ptb_iterator(\n                data[\"valid_text_id\"], batch_size, memory_size)\n            valid_ppl = _run_epoch(sess, valid_data_iter, epoch)\n            print(\"Valid Perplexity: {:.3f}\".format(valid_ppl))\n\n            # Learning rate decay\n            if last_valid_ppl:\n                if heuristic_lr_decay:\n                    if valid_ppl > last_valid_ppl * config.heuristic_threshold:\n                        lr /= 1. + (valid_ppl / last_valid_ppl \\\n                                    - config.heuristic_threshold) \\\n                                   * config.heuristic_rate\n                    last_valid_ppl = last_valid_ppl \\\n                                     * (1 - config.heuristic_smooth_rate) \\\n                                     + valid_ppl * config.heuristic_smooth_rate\n                else:\n                    if valid_ppl > last_valid_ppl:\n                        lr /= config.learning_rate_anneal_factor\n                    last_valid_ppl = valid_ppl\n            else:\n                last_valid_ppl = valid_ppl\n            print(\"last_valid_ppl: {:.6f}\".format(last_valid_ppl))\n\n        epoch = sess.run(global_step)\n        print('Terminate after epoch ', epoch)\n\n        # Test\n        test_data_iter = ptb_iterator(data[\"test_text_id\"], 1, memory_size)\n        test_ppl = _run_epoch(sess, test_data_iter, 0)\n        print(\"Test Perplexity: {:.3f}\".format(test_ppl))\n\nif __name__ == '__main__':\n    tf.app.run(main=_main)\n\n"
  },
  {
    "path": "texar_repo/examples/memory_network_lm/ptb_reader.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Utilities for preprocessing and iterating over the PTB data.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=invalid-name, too-many-locals\n\nimport os\nimport numpy as np\n\nimport tensorflow as tf\n\nimport texar as tx\n\ndef ptb_iterator(data, batch_size, num_steps):\n    \"\"\"Iterates through the ptb data.\n    \"\"\"\n    data_length = len(data)\n    batch_length = data_length // batch_size\n\n    data = np.asarray(data[:batch_size*batch_length])\n    data = data.reshape([batch_size, batch_length])\n\n    epoch_size = (batch_length - 1) // num_steps\n    if epoch_size == 0:\n        raise ValueError(\"epoch_size == 0, decrease batch_size or num_steps\")\n\n    for i in range(epoch_size):\n        x = data[:, i * num_steps : (i+1) * num_steps]\n        y = data[:, i * num_steps + 1 : (i+1) * num_steps + 1]\n        yield (x, y)\n\ndef ptb_iterator_memnet(data, batch_size, memory_size):\n    \"\"\"Iterates through the ptb data.\n    \"\"\"\n    data_length = len(data)\n    length = data_length - memory_size\n    order = list(range(length))\n    np.random.shuffle(order)\n\n    data = np.asarray(data)\n\n    for i in range(0, length, batch_size):\n        x, y = [], []\n        for j in range(i, min(i + batch_size, length)):\n            idx = order[j]\n            x.append(data[idx : idx + memory_size])\n            y.append(data[idx + memory_size])\n        x, y = np.asarray(x), np.asarray(y)\n        yield (x, y)\n\ndef prepare_data(data_path):\n    \"\"\"Preprocess PTB data.\n    \"\"\"\n    train_path = os.path.join(data_path, \"ptb.train.txt\")\n    if not tf.gfile.Exists(train_path):\n        url = 'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz'\n        tx.data.maybe_download(url, data_path, extract=True)\n        data_path = os.path.join(data_path, 'simple-examples', 'data')\n\n    train_path = os.path.join(data_path, \"ptb.train.txt\")\n    valid_path = os.path.join(data_path, \"ptb.valid.txt\")\n    test_path = os.path.join(data_path, \"ptb.test.txt\")\n\n    word_to_id = tx.data.make_vocab(\n        train_path, newline_token=\"<EOS>\", return_type=\"dict\")\n    assert len(word_to_id) == 10000\n\n    train_text = tx.data.read_words(\n        train_path, newline_token=\"<EOS>\")\n    train_text_id = [word_to_id[w] for w in train_text if w in word_to_id]\n\n    valid_text = tx.data.read_words(\n        valid_path, newline_token=\"<EOS>\")\n    valid_text_id = [word_to_id[w] for w in valid_text if w in word_to_id]\n\n    test_text = tx.data.read_words(\n        test_path, newline_token=\"<EOS>\")\n    test_text_id = [word_to_id[w] for w in test_text if w in word_to_id]\n\n    data = {\n        \"train_text\": train_text,\n        \"valid_text\": valid_text,\n        \"test_text\": test_text,\n        \"train_text_id\": train_text_id,\n        \"valid_text_id\": valid_text_id,\n        \"test_text_id\": test_text_id,\n        \"vocab\": word_to_id,\n        \"vocab_size\": len(word_to_id)\n    }\n    return data\n"
  },
  {
    "path": "texar_repo/examples/rl_gym/README.md",
    "content": "# Reinforcement Learning for Games #\n\nThis example implements three RL algorithms for the Cartpole game based on the OpenAI Gym environment:\n* [pg_cartpole.py](./pg_cartpole.py) uses Policy Gradient\n* [dqn_cartpole.py](./dqn_cartpole.py) uses Deep-Q\n* [ac_cartpole.py](./ac_cartpole.py) uses Actor-critic\n\nThe example is for demonstrating the Texar RL APIs (for games), and only implements the most basic versions of respective algorithms.\n\n## Usage ##\n\nRun the following cmd to start training:\n\n```\npython pg_cartpole.py --config config \npython dqn_cartpole.py --config config \npython ac_cartpole.py --config config \n```\n"
  },
  {
    "path": "texar_repo/examples/rl_gym/ac_cartpole.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nPolicy gradient for the CartPole game in OpenAI gym.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=invalid-name\n\nimport importlib\nimport gym\nimport tensorflow as tf\nimport texar as tx\n\nflags = tf.flags\n\nflags.DEFINE_string(\"config\", \"config\", \"The config to use.\")\n\nFLAGS = flags.FLAGS\n\nconfig = importlib.import_module(FLAGS.config)\n\nif __name__ == '__main__':\n    env = gym.make('CartPole-v0')\n    env = env.unwrapped\n\n    env_config = tx.agents.get_gym_env_config(env)\n\n    agent = tx.agents.ActorCriticAgent(env_config=env_config)\n    with tf.Session() as sess:\n        agent.sess = sess\n\n        sess.run(tf.global_variables_initializer())\n        sess.run(tf.local_variables_initializer())\n        sess.run(tf.tables_initializer())\n\n        feed_dict = {tx.global_mode(): tf.estimator.ModeKeys.TRAIN}\n\n        for e in range(5000):\n            reward_sum = 0.\n            observ = env.reset()\n            agent.reset()\n            while True:\n                action = agent.get_action(observ, feed_dict=feed_dict)\n\n                next_observ, reward, terminal, _ = env.step(action=action)\n                agent.observe(reward, terminal, feed_dict=feed_dict)\n                observ = next_observ\n\n                reward_sum += reward\n                if terminal:\n                    break\n\n            if (e + 1) % 10 == 0:\n                print('episode {}: {}'.format(e + 1, reward_sum))\n"
  },
  {
    "path": "texar_repo/examples/rl_gym/config.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nCartpole config.\n\"\"\"\n\n# pylint: disable=invalid-name\n\npolicy_hparams = None # Use default hyperparameters\n\npg_agent_hparams = {\n    \"policy_hparams\": policy_hparams,\n    \"normalize_reward\": True\n}\n"
  },
  {
    "path": "texar_repo/examples/rl_gym/dqn_cartpole.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nPolicy gradient for the CartPole game in OpenAI gym.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=invalid-name\n\nimport importlib\nimport gym\nimport tensorflow as tf\nimport texar as tx\nfrom texar.agents import PGAgent\n\n\nflags = tf.flags\n\nflags.DEFINE_string(\"config\", \"config\", \"The config to use.\")\n\nFLAGS = flags.FLAGS\n\nconfig = importlib.import_module(FLAGS.config)\n\n\nif __name__ == '__main__':\n    env = gym.make('CartPole-v0')\n    env = env.unwrapped\n\n    env_config = tx.agents.get_gym_env_config(env)\n\n    with tf.Session() as sess:\n        agent = tx.agents.DQNAgent(sess=sess, env_config=env_config)\n\n        sess.run(tf.global_variables_initializer())\n        sess.run(tf.local_variables_initializer())\n        sess.run(tf.tables_initializer())\n\n        feed_dict = {tx.global_mode(): tf.estimator.ModeKeys.TRAIN}\n\n        for e in range(500):\n            reward_sum = 0.\n            observ = env.reset()\n            agent.reset()\n            while True:\n                action = agent.get_action(observ, feed_dict=feed_dict)\n\n                next_observ, reward, terminal, _ = env.step(action=action)\n                agent.observe(reward, terminal, feed_dict=feed_dict)\n                observ = next_observ\n\n                reward_sum += reward\n                if terminal:\n                    break\n\n            if (e + 1) % 10 == 0:\n                print('episode {}: {}'.format(e + 1, reward_sum))\n"
  },
  {
    "path": "texar_repo/examples/rl_gym/pg_cartpole.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nPolicy gradient for the CartPole game in OpenAI gym.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=invalid-name\n\nimport importlib\nimport gym\nimport tensorflow as tf\nimport texar as tx\nfrom texar.agents import PGAgent\n\nflags = tf.flags\n\nflags.DEFINE_string(\"config\", \"config\", \"The config to use.\")\n\nFLAGS = flags.FLAGS\n\nconfig = importlib.import_module(FLAGS.config)\n\n\ndef _main(_):\n    env = gym.make('CartPole-v0')\n    env = env.unwrapped\n\n    env_config = tx.agents.get_gym_env_config(env)\n    agent = PGAgent(\n        env_config,\n        policy_kwargs={'action_space': env_config.action_space},\n        hparams=config.pg_agent_hparams)\n\n    sess = tf.Session()\n    agent.sess = sess\n    sess.run(tf.global_variables_initializer())\n    sess.run(tf.local_variables_initializer())\n    sess.run(tf.tables_initializer())\n    feed_dict = {tx.global_mode(): tf.estimator.ModeKeys.TRAIN}\n\n    for e in range(300):\n        reward_sum = 0.\n        observ = env.reset()\n        agent.reset()\n        while True:\n            action = agent.get_action(observ, feed_dict=feed_dict)\n\n            next_observ, reward, terminal, _ = env.step(action=action)\n            if terminal:\n                reward = 0.\n            agent.observe(reward, terminal, feed_dict=feed_dict)\n            observ = next_observ\n\n            reward_sum += reward\n            if terminal:\n                break\n\n        if (e + 1) % 10 == 0:\n            print('episode {}: {}'.format(e + 1, reward_sum))\n\n    sess.close()\n\n\nif __name__ == '__main__':\n    tf.app.run(main=_main)\n"
  },
  {
    "path": "texar_repo/examples/sentence_classifier/README.md",
    "content": "# Sentence Sentiment Classifier #\n\nThis example builds sentence convolutional classifier, and trains on [SST data](https://nlp.stanford.edu/sentiment/index.html). The example config [config_kim.py](./config_kim.py) corresponds to the paper \n[(Kim) Convolutional Neural Networks for Sentence Classification](https://arxiv.org/pdf/1408.5882.pdf). \n\nThe example shows:\n  * Contruction of simple model, involving the `Embedder` and `Conv1DClassifier`.\n  * Use of Texar `MultiAlignedData` to read parallel text and label data. \n\n## Usage ##\n\nUse the following cmd to download and prepare the SST binary data:\n\n```\npython sst_data_preprocessor.py [--data_path ./data]\n```\n\nHere\n  * `--data_path` specifies the directory to store the SST data. If the data files do not exist, the program will automatically download, extract, and pre-process the data.\n\nThe following cmd trains the model with Kim's config:\n\n```\npython clas_main.py --config config_kim\n```\n\nHere:\n  * `--config` specifies the config file to use. E.g., the above use the configuration defined in [config_kim.py](./config_kim.py)\n\nThe model will begin training and evaluating on the validation data, and will evaluate on the test data after every epoch if a valid accuracy is obtained. \n\n## Results ##\n\nThe model achieves around `83%` test set accuracy.\n"
  },
  {
    "path": "texar_repo/examples/sentence_classifier/clas_main.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Example for building a sentence convolutional classifier.\n\nUse `./sst_data_preprocessor.py` to download and clean the SST binary data.\n\nTo run:\n\n$ python clas_main.py --config=config_kim\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport importlib\nimport tensorflow as tf\nimport texar as tx\n\n# pylint: disable=invalid-name, too-many-locals\n\nflags = tf.flags\n\nflags.DEFINE_string(\"config\", \"config_kim\", \"The config to use.\")\n\nFLAGS = flags.FLAGS\n\nconfig = importlib.import_module(FLAGS.config)\n\ndef _main(_):\n    # Data\n    train_data = tx.data.MultiAlignedData(config.train_data)\n    val_data = tx.data.MultiAlignedData(config.val_data)\n    test_data = tx.data.MultiAlignedData(config.test_data)\n    iterator = tx.data.TrainTestDataIterator(train_data, val_data, test_data)\n    batch = iterator.get_next()\n\n    # Model architecture\n    embedder = tx.modules.WordEmbedder(\n        vocab_size=train_data.vocab('x').size, hparams=config.emb)\n    classifier = tx.modules.Conv1DClassifier(config.clas)\n    logits, pred = classifier(embedder(batch['x_text_ids']))\n\n    # Losses & train ops\n    loss = tf.losses.sparse_softmax_cross_entropy(\n        labels=batch['y'], logits=logits)\n    accu = tx.evals.accuracy(batch['y'], pred)\n\n    train_op = tx.core.get_train_op(loss, hparams=config.opt)\n\n    def _run_epoch(sess, mode, epoch=0, verbose=False):\n        is_train = tx.utils.is_train_mode_py(mode)\n\n        fetches = {\n            \"accu\": accu,\n            \"batch_size\": tx.utils.get_batch_size(batch['y'])\n        }\n        if is_train:\n            fetches[\"train_op\"] = train_op\n        feed_dict = {tx.context.global_mode(): mode}\n\n        cum_accu = 0.\n        nsamples = 0\n        step = 0\n        while True:\n            try:\n                rets = sess.run(fetches, feed_dict)\n                step += 1\n\n                accu_ = rets['accu']\n                cum_accu += accu_ * rets['batch_size']\n                nsamples += rets['batch_size']\n\n                if verbose and (step == 1 or step % 100 == 0):\n                    tf.logging.info(\n                        \"epoch: {0:2} step: {1:4} accu: {2:.4f}\"\n                        .format(epoch, step, accu_))\n            except tf.errors.OutOfRangeError:\n                break\n        return cum_accu / nsamples\n\n    with tf.Session() as sess:\n        sess.run(tf.global_variables_initializer())\n        sess.run(tf.local_variables_initializer())\n        sess.run(tf.tables_initializer())\n\n        best_val_accu = -1.\n        for epoch in range(config.num_epochs):\n            # Train\n            iterator.switch_to_train_data(sess)\n            train_accu = _run_epoch(sess, tf.estimator.ModeKeys.TRAIN, epoch)\n            # Val\n            iterator.switch_to_val_data(sess)\n            val_accu = _run_epoch(sess, tf.estimator.ModeKeys.EVAL, epoch)\n            tf.logging.info('epoch: {0:2} train accu: {1:.4f} val accu: {2:.4f}'\n                            .format(epoch+1, train_accu, val_accu))\n            # Test\n            if val_accu > best_val_accu:\n                best_val_accu = val_accu\n\n                iterator.switch_to_test_data(sess)\n                test_accu = _run_epoch(sess, tf.estimator.ModeKeys.EVAL)\n                tf.logging.info('test accu: {0:.4f}'.format(test_accu))\n\nif __name__ == '__main__':\n    tf.logging.set_verbosity(tf.logging.INFO)\n    tf.app.run(main=_main)\n"
  },
  {
    "path": "texar_repo/examples/sentence_classifier/config_kim.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Sentence convolutional classifier config.\n\nThis is (approximately) the config of the paper:\n(Kim) Convolutional Neural Networks for Sentence Classification\n  https://arxiv.org/pdf/1408.5882.pdf\n\"\"\"\n\n# pylint: disable=invalid-name, too-few-public-methods, missing-docstring\n\nimport copy\n\nnum_epochs = 15\n\ntrain_data = {\n    \"batch_size\": 50,\n    \"datasets\": [\n        {\n            \"files\": \"./data/sst2.train.sentences.txt\",\n            \"vocab_file\": \"./data/sst2.vocab\",\n            # Discards samples with length > 56\n            \"max_seq_length\": 56,\n            \"length_filter_mode\": \"discard\",\n            # Do not append BOS/EOS tokens to the sentences\n            \"bos_token\": \"\",\n            \"eos_token\": \"\",\n            \"data_name\": \"x\"\n        },\n        {\n            \"files\": \"./data/sst2.train.labels.txt\",\n            \"data_type\": \"int\",\n            \"data_name\": \"y\"\n        }\n    ]\n}\n# The val and test data have the same config with the train data, except\n# for the file names\nval_data = copy.deepcopy(train_data)\nval_data[\"datasets\"][0][\"files\"] = \"./data/sst2.dev.sentences.txt\"\nval_data[\"datasets\"][1][\"files\"] = \"./data/sst2.dev.labels.txt\"\ntest_data = copy.deepcopy(train_data)\ntest_data[\"datasets\"][0][\"files\"] = \"./data/sst2.test.sentences.txt\"\ntest_data[\"datasets\"][1][\"files\"] = \"./data/sst2.test.labels.txt\"\n\n# Word embedding\nemb = {\n    \"dim\": 300\n}\n\n# Classifier\nclas = {\n    \"num_conv_layers\": 1,\n    \"filters\": 100,\n    \"kernel_size\": [3, 4, 5],\n    \"conv_activation\": \"relu\",\n    \"pooling\": \"MaxPooling1D\",\n    \"num_dense_layers\": 0,\n    \"dropout_conv\": [1],\n    \"dropout_rate\": 0.5,\n    \"num_classes\": 2\n}\n\n# Optimization\n# Just use the default config, e.g., Adam Optimizer\nopt = {}\n"
  },
  {
    "path": "texar_repo/examples/sentence_classifier/sst_data_preprocessor.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Preparing the SST2 dataset.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\nimport re\nfrom io import open # pylint: disable=redefined-builtin\nimport tensorflow as tf\nimport texar as tx\n\n# pylint: disable=invalid-name, too-many-locals\n\nflags = tf.flags\n\nflags.DEFINE_string(\"data_path\", \"./data\",\n                    \"Directory containing SST data. \"\n                    \"E.g., ./data/sst2.train.sentences.txt. If not exists, \"\n                    \"the directory will be created and SST raw data will \"\n                    \"be downloaded.\")\n\nFLAGS = flags.FLAGS\n\n\ndef clean_sst_text(text):\n    \"\"\"Cleans tokens in the SST data, which has already been tokenized.\n    \"\"\"\n    text = re.sub(r\"[^A-Za-z0-9(),!?\\'\\`]\", \" \", text)\n    text = re.sub(r\"\\s{2,}\", \" \", text)\n    return text.strip().lower()\n\ndef transform_raw_sst(data_path, raw_fn, new_fn):\n    \"\"\"Transforms the raw data format to a new format.\n    \"\"\"\n    fout_x_name = os.path.join(data_path, new_fn + '.sentences.txt')\n    fout_x = open(fout_x_name, 'w', encoding='utf-8')\n    fout_y_name = os.path.join(data_path, new_fn + '.labels.txt')\n    fout_y = open(fout_y_name, 'w', encoding='utf-8')\n\n    fin_name = os.path.join(data_path, raw_fn)\n    with open(fin_name, 'r', encoding='utf-8') as fin:\n        for line in fin:\n            parts = line.strip().split()\n            label = parts[0]\n            sent = ' '.join(parts[1:])\n            sent = clean_sst_text(sent)\n            fout_x.write(sent + '\\n')\n            fout_y.write(label + '\\n')\n\n    return fout_x_name, fout_y_name\n\ndef prepare_data(data_path):\n    \"\"\"Preprocesses SST2 data.\n    \"\"\"\n    train_path = os.path.join(data_path, \"sst.train.sentences.txt\")\n    if not tf.gfile.Exists(train_path):\n        url = ('https://raw.githubusercontent.com/ZhitingHu/'\n               'logicnn/master/data/raw/')\n        files = ['stsa.binary.phrases.train', 'stsa.binary.dev',\n                 'stsa.binary.test']\n        for fn in files:\n            tx.data.maybe_download(url + fn, data_path, extract=True)\n\n    fn_train, _ = transform_raw_sst(\n        data_path, 'stsa.binary.phrases.train', 'sst2.train')\n    transform_raw_sst(data_path, 'stsa.binary.dev', 'sst2.dev')\n    transform_raw_sst(data_path, 'stsa.binary.test', 'sst2.test')\n\n    vocab = tx.data.make_vocab(fn_train)\n    fn_vocab = os.path.join(data_path, 'sst2.vocab')\n    with open(fn_vocab, 'w', encoding='utf-8') as f_vocab:\n        for v in vocab:\n            f_vocab.write(v + '\\n')\n\n    tf.logging.info('Preprocessing done: {}'.format(data_path))\n\ndef _main(_):\n    prepare_data(FLAGS.data_path)\n\nif __name__ == '__main__':\n    tf.logging.set_verbosity(tf.logging.INFO)\n    tf.app.run(main=_main)\n"
  },
  {
    "path": "texar_repo/examples/seq2seq_attn/README.md",
    "content": "# Seq2seq Model #\n\nThis example builds an attentional seq2seq model for machine translation.\n\n## Usage ##\n\n### Dataset ###\n\nTwo example datasets are provided:\n\n  * toy_copy: A small toy autoencoding dataset from [TF Seq2seq toolkit](https://github.com/google/seq2seq/tree/2500c26add91b079ca00cf1f091db5a99ddab9ae).\n  * iwslt14: The benchmark [IWSLT2014](https://sites.google.com/site/iwsltevaluation2014/home) (de-en) machine translation dataset, following [(Ranzato et al., 2015)](https://arxiv.org/pdf/1511.06732.pdf) for data pre-processing.\n\nDownload the data with the following cmds:\n\n```\npython prepare_data.py --data toy_copy\npython prepare_data.py --data iwslt14\n```\n\n### Train the model ###\n\nTrain the model with the following cmd:\n\n```\npython seq2seq_attn.py --config_model config_model --config_data config_toy_copy\n```\n\nHere:\n  * `--config_model` specifies the model config. Note not to include the `.py` suffix.\n  * `--config_data` specifies the data config.\n\n[config_model.py](./config_model.py) specifies a single-layer seq2seq model with Luong attention and bi-directional RNN encoder. Hyperparameters taking default values can be omitted from the config file. \n\nFor demonstration purpose, [config_model_full.py](./config_model_full.py) gives all possible hyperparameters for the model. The two config files will lead to the same model.\n\n## Results ##\n\nOn the IWSLT14 dataset, using original target texts as reference(no  `<UNK>`  in the reference), the model achieves `BLEU = 26.44 ± 0.18` .\n\n"
  },
  {
    "path": "texar_repo/examples/seq2seq_attn/config_iwslt14.py",
    "content": "\nnum_epochs = 15\ndisplay = 500\n\nsource_vocab_file = './data/iwslt14/vocab.de'\ntarget_vocab_file = './data/iwslt14/vocab.en'\n\ntrain = {\n    'batch_size': 32,\n    'allow_smaller_final_batch': False,\n    'source_dataset': {\n        \"files\": 'data/iwslt14/train.de',\n        'vocab_file': source_vocab_file,\n        'max_seq_length': 50\n    },\n    'target_dataset': {\n        'files': 'data/iwslt14/train.en',\n        'vocab_file': target_vocab_file,\n        'max_seq_length': 50\n    }\n}\nval = {\n    'batch_size': 32,\n    'shuffle': False,\n    'source_dataset': {\n        \"files\": 'data/iwslt14/valid.de',\n        'vocab_file': source_vocab_file,\n    },\n    'target_dataset': {\n        'files': 'data/iwslt14/valid.en',\n        'vocab_file': target_vocab_file,\n    }\n}\ntest = {\n    'batch_size': 32,\n    'shuffle': False,\n    'source_dataset': {\n        \"files\": 'data/iwslt14/test.de',\n        'vocab_file': source_vocab_file,\n    },\n    'target_dataset': {\n        'files': 'data/iwslt14/test.en',\n        'vocab_file': target_vocab_file,\n    }\n}\n"
  },
  {
    "path": "texar_repo/examples/seq2seq_attn/config_model.py",
    "content": "# Attentional Seq2seq model.\n# Hyperparameters not specified here will take the default values.\n\nnum_units = 256\nbeam_width = 10\n\nembedder = {\n    'dim': num_units\n}\nencoder = {\n    'rnn_cell_fw': {\n        'kwargs': {\n            'num_units': num_units\n        }\n    }\n}\ndecoder = {\n    'rnn_cell': {\n        'kwargs': {\n            'num_units': num_units\n        },\n    },\n    'attention': {\n        'kwargs': {\n            'num_units': num_units,\n        },\n        'attention_layer_size': num_units\n    }\n}\nopt = {\n    'optimizer': {\n        'type':  'AdamOptimizer',\n        'kwargs': {\n            'learning_rate': 0.001,\n        },\n    },\n}\n"
  },
  {
    "path": "texar_repo/examples/seq2seq_attn/config_model_full.py",
    "content": "# The full possible hyperparameters for the attentional seq2seq model.\n# Most of the hyperparameters take the default values and are not necessary to\n# specify explicitly. The config here results in the same model with the\n# `config_model.py`.\n\nnum_units = 256\nbeam_width = 10\n\n# --------------------- Embedder --------------------- #\nembedder = {\n    'dim': num_units,\n    'initializer': {\n        'type': 'random_uniform_initializer',\n        'kwargs': {\n            'minval': -0.1,\n            'maxval': 0.1,\n            'seed': None\n        },\n    },\n    'regularizer': {\n        'type': 'L1L2',\n        'kwargs': {\n            'l1': 0,\n            'l2': 0\n        }\n    },\n    'dropout_rate': 0,\n    'dropout_strategy': 'element',\n    'trainable': True,\n    'name': 'word_embedder'\n}\n\n# --------------------- Encoder --------------------- #\nencoder = {\n    'rnn_cell_fw': {\n        'type': 'LSTMCell',\n        'kwargs': {\n            'num_units': num_units,\n            'forget_bias': 1.0,\n            'activation': None,\n            # Other arguments go here for tf.nn.rnn_cell.LSTMCell\n            # ...\n        },\n        'num_layers': 1,\n        'dropout': {\n            'input_keep_prob': 1.0,\n            'output_keep_prob': 1.0,\n            'state_keep_prob': 1.0,\n            'variational_recurrent': False,\n            'input_size': [],\n        },\n        'residual': False,\n        'highway': False,\n    },\n    'rnn_cell_bw': {\n        # The same possible hyperparameters as with 'rnn_cell_fw'\n        # ...\n    },\n    'rnn_cell_share_config': True,\n    'output_layer_fw': {\n        'num_layers': 0,\n        'layer_size': 128,\n        'activation': 'identity',\n        'final_layer_activation': None,\n        'other_dense_kwargs': None,\n        'dropout_layer_ids': [],\n        'dropout_rate': 0.5,\n        'variational_dropout': False\n    },\n    'output_layer_bw': {\n        # The same possible hyperparameters as with 'output_layer_fw'\n        # ...\n    },\n    'output_layer_share_config': True,\n    'name': 'bidirectional_rnn_encoder'\n}\n\n# --------------------- Decoder --------------------- #\ndecoder = {\n    'rnn_cell': {\n        'type': 'LSTMCell',\n        'kwargs': {\n            'num_units': num_units,\n            'forget_bias': 1.0,\n            'activation': None,\n            # Other arguments go here for tf.nn.rnn_cell.LSTMCell\n            # ...\n        },\n        'num_layers': 1,\n        'dropout': {\n            'input_keep_prob': 1.0,\n            'output_keep_prob': 1.0,\n            'state_keep_prob': 1.0,\n            'variational_recurrent': False,\n            'input_size': [],\n        },\n        'residual': False,\n        'highway': False,\n    },\n    'attention': {\n        'type': 'LuongAttention',\n        'kwargs': {\n            'num_units': num_units,\n            'scale': False,\n            'probability_fn': None,\n            'score_mask_value': None,\n            # Other arguments go here for tf.contrib.seq2seq.LuongAttention\n            # ...\n        },\n        'attention_layer_size': num_units,\n        'alignment_history': False,\n        'output_attention': True,\n    },\n    'helper_train': {\n        'type': 'TrainingHelper',\n        'kwargs': {\n            # Arguments go here for tf.contrib.seq2seq.TrainingHelper\n        }\n    },\n    'helper_infer': {\n        # The same possible hyperparameters as with 'helper_train'\n        # ...\n    },\n    'max_decoding_length_train': None,\n    'max_decoding_length_infer': None,\n    'name': 'attention_rnn_decoder'\n}\n# --------------------- Optimization --------------------- #\nopt = {\n    'optimizer': {\n        'type':  'AdamOptimizer',\n        'kwargs': {\n            'learning_rate': 0.001,\n            # Other keyword arguments for the optimizer class\n        },\n    },\n    'learning_rate_decay': {\n        # Hyperparameters of learning rate decay\n    },\n    'gradient_clip': {\n        # Hyperparameters of gradient clipping\n    },\n    'gradient_noise_scale': None,\n    'name': None\n}\n"
  },
  {
    "path": "texar_repo/examples/seq2seq_attn/config_toy_copy.py",
    "content": "\nnum_epochs = 4\ndisplay = 50\n\nsource_vocab_file = './data/toy_copy/train/vocab.sources.txt'\ntarget_vocab_file = './data/toy_copy/train/vocab.targets.txt'\n\ntrain = {\n    'batch_size': 32,\n    'source_dataset': {\n        \"files\": './data/toy_copy/train/sources.txt',\n        'vocab_file': source_vocab_file\n    },\n    'target_dataset': {\n        'files': './data/toy_copy/train/targets.txt',\n        'vocab_file': target_vocab_file\n    }\n}\nval = {\n    'batch_size': 32,\n    'source_dataset': {\n        \"files\": './data/toy_copy/dev/sources.txt',\n        'vocab_file': source_vocab_file\n    },\n    'target_dataset': {\n        \"files\": './data/toy_copy/dev/targets.txt',\n        'vocab_file': target_vocab_file\n    }\n}\ntest = {\n    'batch_size': 32,\n    'source_dataset': {\n        \"files\": './data/toy_copy/test/sources.txt',\n        'vocab_file': source_vocab_file\n    },\n    'target_dataset': {\n        \"files\": './data/toy_copy/test/targets.txt',\n        'vocab_file': target_vocab_file\n    }\n}\n"
  },
  {
    "path": "texar_repo/examples/seq2seq_attn/prepare_data.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Downloads data.\n\"\"\"\nimport tensorflow as tf\nimport texar as tx\n\n# pylint: disable=invalid-name\n\nflags = tf.flags\n\nflags.DEFINE_string(\"data\", \"iwslt14\", \"Data to download [iwslt14|toy_copy]\")\n\nFLAGS = flags.FLAGS\n\ndef prepare_data():\n    \"\"\"Downloads data.\n    \"\"\"\n    if FLAGS.data == 'iwslt14':\n        tx.data.maybe_download(\n            urls='https://drive.google.com/file/d/'\n                 '1y4mUWXRS2KstgHopCS9koZ42ENOh6Yb9/view?usp=sharing',\n            path='./',\n            filenames='iwslt14.zip',\n            extract=True)\n    elif FLAGS.data == 'toy_copy':\n        tx.data.maybe_download(\n            urls='https://drive.google.com/file/d/'\n                 '1fENE2rakm8vJ8d3voWBgW4hGlS6-KORW/view?usp=sharing',\n            path='./',\n            filenames='toy_copy.zip',\n            extract=True)\n    else:\n        raise ValueError('Unknown data: {}'.format(FLAGS.data))\n\ndef main():\n    \"\"\"Entrypoint.\n    \"\"\"\n    prepare_data()\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "texar_repo/examples/seq2seq_attn/seq2seq_attn.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Attentional Seq2seq.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\n\n#pylint: disable=invalid-name, too-many-arguments, too-many-locals\n\nimport importlib\nimport tensorflow as tf\nimport texar as tx\n\nflags = tf.flags\n\nflags.DEFINE_string(\"config_model\", \"config_model\", \"The model config.\")\nflags.DEFINE_string(\"config_data\", \"config_iwslt14\", \"The dataset config.\")\n\nFLAGS = flags.FLAGS\n\nconfig_model = importlib.import_module(FLAGS.config_model)\nconfig_data = importlib.import_module(FLAGS.config_data)\n\n\ndef build_model(batch, train_data):\n    \"\"\"Assembles the seq2seq model.\n    \"\"\"\n    source_embedder = tx.modules.WordEmbedder(\n        vocab_size=train_data.source_vocab.size, hparams=config_model.embedder)\n\n    encoder = tx.modules.BidirectionalRNNEncoder(\n        hparams=config_model.encoder)\n\n    enc_outputs, _ = encoder(source_embedder(batch['source_text_ids']))\n\n    target_embedder = tx.modules.WordEmbedder(\n        vocab_size=train_data.target_vocab.size, hparams=config_model.embedder)\n\n    decoder = tx.modules.AttentionRNNDecoder(\n        memory=tf.concat(enc_outputs, axis=2),\n        memory_sequence_length=batch['source_length'],\n        vocab_size=train_data.target_vocab.size,\n        hparams=config_model.decoder)\n\n    training_outputs, _, _ = decoder(\n        decoding_strategy='train_greedy',\n        inputs=target_embedder(batch['target_text_ids'][:, :-1]),\n        sequence_length=batch['target_length'] - 1)\n\n    mle_loss = tx.losses.sequence_sparse_softmax_cross_entropy(\n        labels=batch['target_text_ids'][:, 1:],\n        logits=training_outputs.logits,\n        sequence_length=batch['target_length'] - 1)\n\n    train_op = tx.core.get_train_op(mle_loss, hparams=config_model.opt)\n\n    start_tokens = tf.ones_like(batch['target_length']) * \\\n            train_data.target_vocab.bos_token_id\n    beam_search_outputs, _, _ = \\\n        tx.modules.beam_search_decode(\n            decoder_or_cell=decoder,\n            embedding=target_embedder,\n            start_tokens=start_tokens,\n            end_token=train_data.target_vocab.eos_token_id,\n            beam_width=config_model.beam_width,\n            max_decoding_length=60)\n\n    return train_op, beam_search_outputs\n\n\ndef main():\n    \"\"\"Entrypoint.\n    \"\"\"\n    train_data = tx.data.PairedTextData(hparams=config_data.train)\n    val_data = tx.data.PairedTextData(hparams=config_data.val)\n    test_data = tx.data.PairedTextData(hparams=config_data.test)\n    data_iterator = tx.data.TrainTestDataIterator(\n        train=train_data, val=val_data, test=test_data)\n\n    batch = data_iterator.get_next()\n\n    train_op, infer_outputs = build_model(batch, train_data)\n\n    def _train_epoch(sess):\n        data_iterator.switch_to_train_data(sess)\n\n        step = 0\n        while True:\n            try:\n                loss = sess.run(train_op)\n                if step % config_data.display == 0:\n                    print(\"step={}, loss={:.4f}\".format(step, loss))\n                step += 1\n            except tf.errors.OutOfRangeError:\n                break\n\n    def _eval_epoch(sess, mode):\n        if mode == 'val':\n            data_iterator.switch_to_val_data(sess)\n        else:\n            data_iterator.switch_to_test_data(sess)\n\n        refs, hypos = [], []\n        while True:\n            try:\n                fetches = [\n                    batch['target_text'][:, 1:],\n                    infer_outputs.predicted_ids[:, :, 0]\n                ]\n                feed_dict = {\n                    tx.global_mode(): tf.estimator.ModeKeys.EVAL\n                }\n                target_texts_ori, output_ids = \\\n                    sess.run(fetches, feed_dict=feed_dict)\n\n                target_texts = tx.utils.strip_special_tokens(\n                    target_texts_ori, is_token_list=True)\n                output_texts = tx.utils.map_ids_to_strs(\n                    ids=output_ids, vocab=val_data.target_vocab)\n\n                for hypo, ref in zip(output_texts, target_texts):\n                    hypos.append(hypo)\n                    refs.append([ref])\n            except tf.errors.OutOfRangeError:\n                break\n\n        return tx.evals.corpus_bleu_moses(list_of_references=refs,\n                                          hypotheses=hypos)\n\n    with tf.Session() as sess:\n        sess.run(tf.global_variables_initializer())\n        sess.run(tf.local_variables_initializer())\n        sess.run(tf.tables_initializer())\n\n        best_val_bleu = -1.\n        for i in range(config_data.num_epochs):\n            _train_epoch(sess)\n\n            val_bleu = _eval_epoch(sess, 'val')\n            best_val_bleu = max(best_val_bleu, val_bleu)\n            print('val epoch={}, BLEU={:.4f}; best-ever={:.4f}'.format(\n                i, val_bleu, best_val_bleu))\n\n            test_bleu = _eval_epoch(sess, 'test')\n            print('test epoch={}, BLEU={:.4f}'.format(i, test_bleu))\n\n            print('=' * 50)\n\n\nif __name__ == '__main__':\n    main()\n\n"
  },
  {
    "path": "texar_repo/examples/seq2seq_configs/README.md",
    "content": "# Seq2seq Model #\n\nThis example builds a (plain) seq2seq model with Texar's model template and Tensorflow estimator. \n\n## Usage ##\n\n### Dataset ###\n\nDownload the example dataset:\n\n  * toy_copy: A small toy autoencoding dataset from [TF Seq2seq toolkit](https://github.com/google/seq2seq/tree/2500c26add91b079ca00cf1f091db5a99ddab9ae).\n\n```\npython [PATH_TEXAR]/examples/seq2seq_attn/prepare_data.py --data toy_copy\n```\n\n### Train the model ###\n\nTrain the model with the following cmd:\n\n```\npython [PATH_TEXAR]/bin/train.py --config_paths config_model_small.yml,config_data_toy_copy.yml \n```\n\nSee [train.py](../../bin/train.py) for other available configurations.\n\n[config_model_small.yml](./config_model_small.yml) speicifies a small-size model with single-layer RNN encoder/decoder. [config_model_medium.yml](./config_model_medium.yml) specifies a medium-size one with 2-layer RNN encoder/decoder.\n\nThe model will be trained/evaluated/checkpointed within the [Tensorflow Estimator](https://www.tensorflow.org/guide/estimators).\n"
  },
  {
    "path": "texar_repo/examples/seq2seq_configs/config_data_toy_copy.yml",
    "content": "# NMT data config. See `texar.data.PairedTextData.default_hparams()` for \n# hyperparameters of train/eval data. Hyperparameters not specified here will \n# take the default values. \ndata_hparams_train:\n  num_epochs: 10\n  batch_size: 32\n  source_dataset:\n    files: ./data/toy_copy/train/sources.txt\n    vocab_file: ./data/toy_copy/train/vocab.sources.txt\n    max_seq_length: 30\n  target_dataset:\n    files: ./data/toy_copy/train/targets.txt\n    vocab_file: ./data/toy_copy/train/vocab.targets.txt\n    max_seq_length: 30\n\ndata_hparams_eval:\n  batch_size: 32\n  shuffle: False\n  source_dataset:\n    files: ./data/toy_copy/dev/sources.txt\n    vocab_file: ./data/toy_copy/train/vocab.sources.txt\n    max_seq_length: 50\n  target_dataset:\n    files: ./data/toy_copy/dev/targets.txt\n    vocab_file: ./data/toy_copy/train/vocab.targets.txt\n    max_seq_length: 50\n"
  },
  {
    "path": "texar_repo/examples/seq2seq_configs/config_model_medium.yml",
    "content": "# Basic Seq2seq model of medium size. See \n# `texar.models.BasicSeq2seq.default_hparams()` for possible hyperparameters \n# default values. Hyperparameters not specified here will take the default \n# values.\nmodel: BasicSeq2seq\nmodel_hparams:\n  source_embedder_hparams:\n    dim: 256\n  encoder_hparams:\n    rnn_cell:\n      type: GRUCell\n      kwargs:\n        num_units: 256 \n        num_layers: 2\n      dropout:\n        input_keep_prob: 0.8\n  decoder_hparams:\n    rnn_cell:\n      type: GRUCell\n      kwargs:\n        num_units: 256\n        num_layers: 2\n      dropout:\n        input_keep_prob: 0.8\n  optimization:\n    optimizer:\n      type: AdamOptimizer\n      kwargs:\n        learning_rate: 0.0001\n"
  },
  {
    "path": "texar_repo/examples/seq2seq_configs/config_model_small.yml",
    "content": "# Basic Seq2seq model of small size. See \n# `texar.models.BasicSeq2seq.default_hparams()` for possible hyperparameters \n# default values. Hyperparameters not specified here will take the default \n# values.\nmodel: BasicSeq2seq\nmodel_hparams:\n  source_embedder_hparams:\n    dim: 128\n  encoder_hparams:\n    rnn_cell:\n      type: GRUCell\n      kwargs:\n        num_units: 128 \n      dropout:\n        input_keep_prob: 0.8\n  decoder_hparams:\n    rnn_cell:\n      type: GRUCell\n      kwargs:\n        num_units: 128\n      dropout:\n        input_keep_prob: 0.8\n  optimization:\n    optimizer:\n      type: AdamOptimizer\n      kwargs:\n        learning_rate: 0.0001\n"
  },
  {
    "path": "texar_repo/examples/seq2seq_exposure_bias/README.md",
    "content": "# Sequence Generation Algorithms Tackling Exposure Bias #\n\nDespite the computational simplicity and efficiency, maximum likelihood training of sequence generation models (e.g., RNNs) suffers from the exposure bias [(Ranzato et al., 2015)](https://arxiv.org/pdf/1511.06732.pdf). That is, the model is trained to predict the next token given the previous ground-truth tokens; while at test time, since the resulting model does not have access to the ground truth, tokens generated by the model itself are instead used to make the next prediction. This discrepancy between training and test leads to the issue that mistakes in prediction can quickly accumulate.\n\nThis example provide implementations of some classic and advanced training algorithms that tackles the exposure bias. The base model is an attentional seq2seq.\n\n* **Maximum Likelihood (MLE)**: attentional seq2seq model with maximum likelihood training.\n* **Reward Augmented Maximum Likelihood (RAML)**: Described in [(Norouzi et al., 2016)](https://arxiv.org/pdf/1609.00150.pdf) and we use the sampling approach (n-gram replacement) by [(Ma et al., 2017)](https://arxiv.org/abs/1705.07136).\n* **Scheduled Sampling**: Described in [(Bengio et al., 2015)](https://arxiv.org/abs/1506.03099)\n* **Interpolation Algorithm**: Described in [(Tan et al., 2018) Connecting the Dots Between MLE and RL for Sequence Generation](https://arxiv.org/abs/1811.09740)\n\n## Usage ##\n\n### Dataset ###\n\nTwo example datasets are provided:\n\n  * iwslt14: The benchmark [IWSLT2014](https://sites.google.com/site/iwsltevaluation2014/home) (de-en) machine translation dataset, following [(Ranzato et al., 2015)](https://arxiv.org/pdf/1511.06732.pdf) for data pre-processing.\n  * gigaword: The benchmark [GIGAWORD](https://catalog.ldc.upenn.edu/LDC2003T05) text summarization dataset. we sampled 200K out of the 3.8M pre-processed training examples provided by [(Rush et al., 2015)](https://www.aclweb.org/anthology/D/D15/D15-1044.pdf) for the sake of training efficiency. We used the refined validation and test sets provided by [(Zhou et al., 2017)](https://arxiv.org/pdf/1704.07073.pdf).\n\nDownload the data with the following commands:\n\n```\npython utils/prepare_data.py --data iwslt14\npython utils/prepare_data.py --data giga\n```\n\n### Train the models ###\n\n#### Baseline Attentional Seq2seq\n\n```\npython baseline_seq2seq_attn_main.py \\\n    --config_model configs.config_model \\\n    --config_data configs.config_iwslt14\n```\n\nHere:\n  * `--config_model` specifies the model config. Note not to include the `.py` suffix.\n  * `--config_data` specifies the data config.\n\n[configs.config_model.py](./configs/config_model.py) specifies a single-layer seq2seq model with Luong attention and bi-directional RNN encoder. Hyperparameters taking default values can be omitted from the config file. \n\nFor demonstration purpose, [configs.config_model_full.py](./configs/config_model_full.py) gives all possible hyperparameters for the model. The two config files will lead to the same model.\n\n#### Reward Augmented Maximum Likelihood (RAML)\n```\npython raml_main.py \\\n    --config_model configs.config_model \\\n    --config_data configs.config_iwslt14 \\\n    --raml_file data/iwslt14/samples_iwslt14.txt \\\n    --n_samples 10\n```\nHere:\n  * `--raml_file` specifies the file containing the augmented samples and rewards.\n  * `--n_samples` specifies number of augmented samples for every target sentence.\n  * `--tau` specifies the temperature of the exponentiated payoff distribution in RAML.\n\nIn the downloaded datasets, we have provided example files for `--raml_file`, which including augmented samples for ```iwslt14``` and ```gigaword``` respectively. We also provide scripts for generating augmented samples by yourself. Please refer to [utils/raml_samples_generation](utils/raml_samples_generation).\n\n\n#### Scheduled Sampling\n```\npython scheduled_sampling_main.py \\\n    --config_model configs.config_model \\\n    --config_data configs.config_iwslt14 \\\n    --decay_factor 500.\n```\nHere:\n  * `--decay_factor` specifies the hyperparameter controling the speed of increasing the probability of sampling from model.\n\n\n#### Interpolation Algorithm\n```\npython interpolation_main.py \\\n    --config_model configs.config_model \\\n    --config_data configs.config_iwslt14 \\\n    --lambdas_init [0.04,0.96,0.0] \\\n    --delta_lambda_self 0.06 \\\n    --delta_lambda_reward 0.06 \\\n    --lambda_reward_steps 4\n```\nHere:\n\n  * `--lambdas_init` specifies the initial value of lambdas.\n  * `--delta_lambda_reward` specifies the increment of lambda_reward every annealing step.\n  * `--delta_lambda_self` specifies the decrement of lambda_self every annealing step.\n  * `--k` specifies the times of increasing lambda_reward after incresing lambda_self once.\n\n## Results ##\n\n### Machine Translation\n| Model      | BLEU Score   |\n| -----------| -------|\n| MLE        | 26.44 ± 0.18  |\n| Scheduled Sampling   | 26.76  ± 0.17  |\n| RAML | 27.22  ± 0.14  |\n| Interpolation | 27.82  ± 0.11  |\n\n### Text Summarization\n| Model      | Rouge-1   | Rouge-2 | Rouge-L |\n| -----------| -------|-------|-------|\n| MLE        | 36.11 ± 0.21  | 16.39 ± 0.16 | 32.32 ± 0.19 |\n| Scheduled Sampling   |  36.59 ± 0.12  |16.79 ± 0.22|32.77 ± 0.17|\n| RAML | 36.30  ± 0.24 | 16.69 ± 0.20 | 32.49 ± 0.17 |\n| Interpolation | 36.72  ± 0.29  |16.99 ± 0.17 | 32.95 ± 0.33|\n\n \n"
  },
  {
    "path": "texar_repo/examples/seq2seq_exposure_bias/baseline_seq2seq_attn_main.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nAttentional Seq2seq.\nsame as examples/seq2seq_attn except that here Rouge is also supported.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\nfrom __future__ import unicode_literals\n\n# pylint: disable=invalid-name, too-many-arguments, too-many-locals\n\nfrom io import open\nimport importlib\nimport tensorflow as tf\nimport texar as tx\nfrom rouge import Rouge\n\nflags = tf.flags\n\nflags.DEFINE_string(\"config_model\", \"configs.config_model\", \"The model config.\")\nflags.DEFINE_string(\"config_data\", \"configs.config_iwslt14\",\n                    \"The dataset config.\")\n\nflags.DEFINE_string('output_dir', '.', 'where to keep training logs')\n\nFLAGS = flags.FLAGS\n\nconfig_model = importlib.import_module(FLAGS.config_model)\nconfig_data = importlib.import_module(FLAGS.config_data)\n\nif not FLAGS.output_dir.endswith('/'):\n    FLAGS.output_dir += '/'\nlog_dir = FLAGS.output_dir + 'training_log_baseline/'\ntx.utils.maybe_create_dir(log_dir)\n\n\ndef build_model(batch, train_data):\n    \"\"\"Assembles the seq2seq model.\n    \"\"\"\n    source_embedder = tx.modules.WordEmbedder(\n        vocab_size=train_data.source_vocab.size, hparams=config_model.embedder)\n\n    encoder = tx.modules.BidirectionalRNNEncoder(\n        hparams=config_model.encoder)\n\n    enc_outputs, _ = encoder(source_embedder(batch['source_text_ids']))\n\n    target_embedder = tx.modules.WordEmbedder(\n        vocab_size=train_data.target_vocab.size, hparams=config_model.embedder)\n\n    decoder = tx.modules.AttentionRNNDecoder(\n        memory=tf.concat(enc_outputs, axis=2),\n        memory_sequence_length=batch['source_length'],\n        vocab_size=train_data.target_vocab.size,\n        hparams=config_model.decoder)\n\n    training_outputs, _, _ = decoder(\n        decoding_strategy='train_greedy',\n        inputs=target_embedder(batch['target_text_ids'][:, :-1]),\n        sequence_length=batch['target_length'] - 1)\n\n    train_op = tx.core.get_train_op(\n        tx.losses.sequence_sparse_softmax_cross_entropy(\n            labels=batch['target_text_ids'][:, 1:],\n            logits=training_outputs.logits,\n            sequence_length=batch['target_length'] - 1),\n        hparams=config_model.opt)\n\n    start_tokens = tf.ones_like(batch['target_length']) *\\\n                   train_data.target_vocab.bos_token_id\n    beam_search_outputs, _, _ = \\\n        tx.modules.beam_search_decode(\n            decoder_or_cell=decoder,\n            embedding=target_embedder,\n            start_tokens=start_tokens,\n            end_token=train_data.target_vocab.eos_token_id,\n            beam_width=config_model.beam_width,\n            max_decoding_length=60)\n\n    return train_op, beam_search_outputs\n\n\ndef print_stdout_and_file(content, file):\n    print(content)\n    print(content, file=file)\n\n\ndef main():\n    \"\"\"Entrypoint.\n    \"\"\"\n    train_data = tx.data.PairedTextData(hparams=config_data.train)\n    val_data = tx.data.PairedTextData(hparams=config_data.val)\n    test_data = tx.data.PairedTextData(hparams=config_data.test)\n    data_iterator = tx.data.TrainTestDataIterator(\n        train=train_data, val=val_data, test=test_data)\n\n    batch = data_iterator.get_next()\n\n    train_op, infer_outputs = build_model(batch, train_data)\n\n    def _train_epoch(sess, epoch_no):\n        data_iterator.switch_to_train_data(sess)\n        training_log_file = \\\n            open(log_dir + 'training_log' + str(epoch_no) + '.txt', 'w',\n                 encoding='utf-8')\n\n        step = 0\n        while True:\n            try:\n                loss = sess.run(train_op)\n                print(\"step={}, loss={:.4f}\".format(step, loss),\n                      file=training_log_file)\n                if step % config_data.observe_steps == 0:\n                    print(\"step={}, loss={:.4f}\".format(step, loss))\n                training_log_file.flush()\n                step += 1\n            except tf.errors.OutOfRangeError:\n                break\n\n    def _eval_epoch(sess, mode, epoch_no):\n        if mode == 'val':\n            data_iterator.switch_to_val_data(sess)\n        else:\n            data_iterator.switch_to_test_data(sess)\n\n        refs, hypos = [], []\n        while True:\n            try:\n                fetches = [\n                    batch['target_text'][:, 1:],\n                    infer_outputs.predicted_ids[:, :, 0]\n                ]\n                feed_dict = {\n                    tx.global_mode(): tf.estimator.ModeKeys.EVAL\n                }\n                target_texts_ori, output_ids = \\\n                    sess.run(fetches, feed_dict=feed_dict)\n\n                target_texts = tx.utils.strip_special_tokens(\n                    target_texts_ori.tolist(), is_token_list=True)\n                target_texts = tx.utils.str_join(target_texts)\n                output_texts = tx.utils.map_ids_to_strs(\n                    ids=output_ids, vocab=val_data.target_vocab)\n\n                tx.utils.write_paired_text(\n                    target_texts, output_texts,\n                    log_dir + mode + '_results' + str(epoch_no) + '.txt',\n                    append=True, mode='h', sep=' ||| ')\n\n                for hypo, ref in zip(output_texts, target_texts):\n                    if config_data.eval_metric == 'bleu':\n                        hypos.append(hypo)\n                        refs.append([ref])\n                    elif config_data.eval_metric == 'rouge':\n                        hypos.append(tx.utils.compat_as_text(hypo))\n                        refs.append(tx.utils.compat_as_text(ref))\n            except tf.errors.OutOfRangeError:\n                break\n\n        if config_data.eval_metric == 'bleu':\n            return tx.evals.corpus_bleu_moses(\n                list_of_references=refs, hypotheses=hypos)\n        elif config_data.eval_metric == 'rouge':\n            rouge = Rouge()\n            return rouge.get_scores(hyps=hypos, refs=refs, avg=True)\n\n    def _calc_reward(score):\n        \"\"\"\n        Return the bleu score or the sum of (Rouge-1, Rouge-2, Rouge-L).\n        \"\"\"\n        if config_data.eval_metric == 'bleu':\n            return score\n        elif config_data.eval_metric == 'rouge':\n            return sum([value['f'] for key, value in score.items()])\n\n    with tf.Session() as sess:\n        sess.run(tf.global_variables_initializer())\n        sess.run(tf.local_variables_initializer())\n        sess.run(tf.tables_initializer())\n\n        best_val_score = -1.\n        scores_file = open(log_dir + 'scores.txt', 'w', encoding='utf-8')\n        for i in range(config_data.num_epochs):\n            _train_epoch(sess, i)\n\n            val_score = _eval_epoch(sess, 'val', i)\n            test_score = _eval_epoch(sess, 'test', i)\n\n            best_val_score = max(best_val_score, _calc_reward(val_score))\n\n            if config_data.eval_metric == 'bleu':\n                print_stdout_and_file(\n                    'val epoch={}, BLEU={:.4f}; best-ever={:.4f}'.format(\n                        i, val_score, best_val_score), file=scores_file)\n\n                print_stdout_and_file(\n                    'test epoch={}, BLEU={:.4f}'.format(i, test_score),\n                    file=scores_file)\n                print_stdout_and_file('=' * 50, file=scores_file)\n\n            elif config_data.eval_metric == 'rouge':\n                print_stdout_and_file(\n                    'valid epoch {}:'.format(i), file=scores_file)\n                for key, value in val_score.items():\n                    print_stdout_and_file(\n                        '{}: {}'.format(key, value), file=scores_file)\n                print_stdout_and_file('fsum: {}; best_val_fsum: {}'.format(\n                    _calc_reward(val_score), best_val_score), file=scores_file)\n\n                print_stdout_and_file(\n                    'test epoch {}:'.format(i), file=scores_file)\n                for key, value in test_score.items():\n                    print_stdout_and_file(\n                        '{}: {}'.format(key, value), file=scores_file)\n                print_stdout_and_file('=' * 110, file=scores_file)\n\n            scores_file.flush()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "texar_repo/examples/seq2seq_exposure_bias/configs/__init__.py",
    "content": ""
  },
  {
    "path": "texar_repo/examples/seq2seq_exposure_bias/configs/config_giga.py",
    "content": "num_epochs = 30\nobserve_steps = 500\n\neval_metric = 'rouge'\n\nbatch_size = 64\nsource_vocab_file = './data/giga/vocab.article'\ntarget_vocab_file = './data/giga/vocab.title'\n\ntrain = {\n    'batch_size': batch_size,\n    'allow_smaller_final_batch': False,\n    'source_dataset': {\n        \"files\": 'data/giga/train.article',\n        'vocab_file': source_vocab_file\n    },\n    'target_dataset': {\n        'files': 'data/giga/train.title',\n        'vocab_file': target_vocab_file\n    }\n}\nval = {\n    'batch_size': batch_size,\n    'shuffle': False,\n    'allow_smaller_final_batch': True,\n    'source_dataset': {\n        \"files\": 'data/giga/valid.article',\n        'vocab_file': source_vocab_file,\n    },\n    'target_dataset': {\n        'files': 'data/giga/valid.title',\n        'vocab_file': target_vocab_file,\n    }\n}\ntest = {\n    'batch_size': batch_size,\n    'shuffle': False,\n    'allow_smaller_final_batch': True,\n    'source_dataset': {\n        \"files\": 'data/giga/test.article',\n        'vocab_file': source_vocab_file,\n    },\n    'target_dataset': {\n        'files': 'data/giga/test.title',\n        'vocab_file': target_vocab_file,\n    }\n}\n"
  },
  {
    "path": "texar_repo/examples/seq2seq_exposure_bias/configs/config_iwslt14.py",
    "content": "num_epochs = 50 # the best epoch occurs within 10 epochs in most cases\nobserve_steps = 500\n\neval_metric = 'bleu'\n\nbatch_size = 64\nsource_vocab_file = './data/iwslt14/vocab.de'\ntarget_vocab_file = './data/iwslt14/vocab.en'\n\ntrain = {\n    'batch_size': batch_size,\n    'shuffle': True,\n    'allow_smaller_final_batch': False,\n    'source_dataset': {\n        \"files\": 'data/iwslt14/train.de',\n        'vocab_file': source_vocab_file,\n        'max_seq_length': 50\n    },\n    'target_dataset': {\n        'files': 'data/iwslt14/train.en',\n        'vocab_file': target_vocab_file,\n        'max_seq_length': 50\n    }\n}\nval = {\n    'batch_size': batch_size,\n    'shuffle': False,\n    'allow_smaller_final_batch': True,\n    'source_dataset': {\n        \"files\": 'data/iwslt14/valid.de',\n        'vocab_file': source_vocab_file,\n    },\n    'target_dataset': {\n        'files': 'data/iwslt14/valid.en',\n        'vocab_file': target_vocab_file,\n    }\n}\ntest = {\n    'batch_size': batch_size,\n    'shuffle': False,\n    'allow_smaller_final_batch': True,\n    'source_dataset': {\n        \"files\": 'data/iwslt14/test.de',\n        'vocab_file': source_vocab_file,\n    },\n    'target_dataset': {\n        'files': 'data/iwslt14/test.en',\n        'vocab_file': target_vocab_file,\n    }\n}\n"
  },
  {
    "path": "texar_repo/examples/seq2seq_exposure_bias/configs/config_model.py",
    "content": "num_units = 256\nbeam_width = 5\ndecoder_layers = 1\ndropout = 0.2\n\nembedder = {\n    'dim': num_units\n}\nencoder = {\n    'rnn_cell_fw': {\n        'kwargs': {\n            'num_units': num_units\n        },\n        'dropout': {\n            'input_keep_prob': 1. - dropout\n        }\n    }\n}\ndecoder = {\n    'rnn_cell': {\n        'kwargs': {\n            'num_units': num_units\n        },\n        'dropout': {\n            'input_keep_prob': 1. - dropout\n        },\n        'num_layers': decoder_layers\n    },\n    'attention': {\n        'kwargs': {\n            'num_units': num_units,\n        },\n        'attention_layer_size': num_units\n    }\n}\nopt = {\n    'optimizer': {\n        'type':  'AdamOptimizer',\n        'kwargs': {\n            'learning_rate': 0.001,\n        },\n    },\n}"
  },
  {
    "path": "texar_repo/examples/seq2seq_exposure_bias/interpolation_decoder.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nInterpolation Decoder is used for interpolation algorithm\nwhich stores one more variable in 'state' recording the\ndecoded ids(state: [decoded_ids, rnn_state]).\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=no-name-in-module, too-many-arguments, too-many-locals\n# pylint: disable=not-context-manager, protected-access, invalid-name\n\nimport tensorflow as tf\n\nfrom texar.modules.decoders.rnn_decoders import \\\n    AttentionRNNDecoder, AttentionRNNDecoderOutput\n\n\nclass InterpolationDecoder(AttentionRNNDecoder):\n    \"\"\"\n    Basicly the same as AttentionRNNDecoder except one\n    more variable except rnn_state in 'state' recording the\n    decoded ids(state: [decoded_ids, rnn_state])\n\n    Args:\n        memory: The memory to query, e.g., the output of an RNN encoder. This\n            tensor should be shaped `[batch_size, max_time, dim]`.\n        memory_sequence_length (optional): A tensor of shape `[batch_size]`\n            containing the sequence lengths for the batch\n            entries in memory. If provided, the memory tensor rows are masked\n            with zeros for values past the respective sequence lengths.\n        cell (RNNCell, optional): An instance of `RNNCell`. If `None`, a cell\n            is created as specified in :attr:`hparams`.\n        cell_dropout_mode (optional): A Tensor taking value of\n            :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, which\n            toggles dropout in the RNN cell (e.g., activates dropout in\n            TRAIN mode). If `None`, :func:`~texar.global_mode` is used.\n            Ignored if :attr:`cell` is given.\n        vocab_size (int, optional): Vocabulary size. Required if\n            :attr:`output_layer` is `None`.\n        output_layer (optional): An instance of\n            :tf_main:`tf.layers.Layer <layers/Layer>`, or\n            :tf_main:`tf.identity <identity>`. Apply to the RNN cell\n            output to get logits. If `None`, a dense layer\n            is used with output dimension set to :attr:`vocab_size`.\n            Set `output_layer=tf.identity` if you do not want to have an\n            output layer after the RNN cell outputs.\n        cell_input_fn (callable, optional): A callable that produces RNN cell\n            inputs. If `None` (default), the default is used:\n            `lambda inputs, attention: tf.concat([inputs, attention], -1)`,\n            which cancats regular RNN cell inputs with attentions.\n        hparams (dict, optional): Hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n    \"\"\"\n    def __init__(self,\n                 memory,\n                 memory_sequence_length=None,\n                 cell=None,\n                 cell_dropout_mode=None,\n                 vocab_size=None,\n                 output_layer=None,\n                 cell_input_fn=None,\n                 hparams=None):\n        AttentionRNNDecoder.__init__(\n            self, memory, memory_sequence_length, cell, cell_dropout_mode,\n            vocab_size, output_layer, cell_input_fn, hparams)\n\n    def initialize(self, name=None):\n        init = AttentionRNNDecoder.initialize(self, name)\n\n        batch_size = tf.shape(init[0])[0]\n\n        # decoded_ids can be initialized as any arbitrary value\n        # because it will be assigned later in decoding\n        initial_decoded_ids = tf.ones((batch_size, 60), dtype=tf.int32)\n\n        initial_rnn_state = init[2]\n        initial_state = [initial_decoded_ids, initial_rnn_state]\n        init[2] = initial_state\n\n        return init\n\n    def step(self, time, inputs, state, name=None):\n        # Basicly the same as in AttentionRNNDecoder except considering\n        # about the different form of 'state'(decoded_ids, rnn_state)\n\n        wrapper_outputs, wrapper_state = self._cell(inputs, state[1])\n        decoded_ids = state[0]\n\n        logits = self._output_layer(wrapper_outputs)\n\n        sample_ids = self._helper.sample(\n            time=time, outputs=logits, state=[decoded_ids, wrapper_state])\n\n        (finished, next_inputs, next_state) = self._helper.next_inputs(\n            time=time,\n            outputs=logits,\n            state=[decoded_ids, wrapper_state],\n            sample_ids=sample_ids)\n\n        attention_scores = wrapper_state.alignments\n        attention_context = wrapper_state.attention\n        outputs = AttentionRNNDecoderOutput(\n            logits, sample_ids, wrapper_outputs,\n            attention_scores, attention_context)\n\n        return (outputs, next_state, next_inputs, finished)\n"
  },
  {
    "path": "texar_repo/examples/seq2seq_exposure_bias/interpolation_helper.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nHelper for interpolation algirithm.\nNew token is sample from model, ground_truth or reward according to lambdas\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\nimport numpy as np\n\nfrom tensorflow.contrib.seq2seq import SampleEmbeddingHelper\nfrom texar.evals.bleu import sentence_bleu\nfrom rouge import Rouge\n\nrouge = Rouge()\n\n\ndef calc_reward(refs, hypo, unk_id, metric):\n    \"\"\"\n    calculate the reward given hypo and refs and will return\n    bleu score if metric is 'bleu' or return\n    sum of (Rouge-1, Rouge-2, Rouge-L) if metric is 'rouge'\n    \"\"\"\n    if len(hypo) == 0 or len(refs[0]) == 0:\n        return 0.\n\n    for i in range(len(hypo)):\n        assert isinstance(hypo[i], int)\n        if hypo[i] == unk_id:\n            hypo[i] = -1\n\n    if metric == 'bleu':\n        return 0.01 * sentence_bleu(\n            references=refs, hypothesis=hypo, smooth=True)\n    else:\n        ref_str = ' '.join([str(word) for word in refs[0]])\n        hypo_str = ' '.join([str(word) for word in hypo])\n        rouge_scores = \\\n            rouge.get_scores(hyps=[hypo_str], refs=[ref_str], avg=True)\n        return sum([value['f'] for key, value in rouge_scores.items()])\n\n\nclass InterpolationHelper(SampleEmbeddingHelper):\n    \"\"\"\n    Helper for interpolation algirithm.\n    New token is sample from model, ground_truth or reward according to lambdas\n\n    Args:\n        embedding: A callable that takes a vector tensor of `ids` (argmax ids),\n            or the `params` argument for `embedding_lookup`. The returned tensor\n            will be passed to the decoder input.\n        start_tokens: `int32` vector shaped `[batch_size]`, the start tokens.\n        end_token: `int32` scalar, the token that marks end of decoding.\n        vocab: texar.Vocab, the vocabularies of training set\n        reward_metric: 'bleu' or 'rouge', the metric of reward\n        ground_truth: the ground truth in training set\n        ground_truth_length: the length of ground truth sentences\n        lambdas: 'float32' vector shapes [3], according to which\n            decide the way of generate the next token in training\n    \"\"\"\n    def __init__(self,\n                 embedding,\n                 start_tokens,\n                 end_token,\n                 vocab,\n                 reward_metric,\n                 ground_truth,\n                 ground_truth_length,\n                 lambdas):\n        SampleEmbeddingHelper.__init__(self, embedding, start_tokens, end_token)\n\n        self._vocab = vocab\n        self._ground_truth = ground_truth\n        self._lambdas = lambdas\n        self._ground_truth_length = ground_truth_length\n        self._metric = reward_metric\n\n    def sample(self, time, outputs, state, name=None):\n        \"\"\"\n        sample tokens for next step, notice the special form\n        of 'state'([decoded_ids, rnn_state])\n        \"\"\"\n        sample_method_sampler = \\\n            tf.distributions.Categorical(probs=self._lambdas)\n        sample_method_id = sample_method_sampler.sample()\n\n        truth_feeding = lambda: tf.cond(\n            tf.less(time, tf.shape(self._ground_truth)[1]),\n            lambda: tf.to_int32(self._ground_truth[:, time]),\n            lambda: tf.ones_like(self._ground_truth[:, 0],\n                                 dtype=tf.int32) * self._vocab.eos_token_id)\n\n        self_feeding = lambda : SampleEmbeddingHelper.sample(\n            self, time, outputs, state, name)\n\n        reward_feeding = lambda : self._sample_by_reward(time, state)\n\n        sample_ids = tf.cond(\n            tf.logical_or(tf.equal(time, 0), tf.equal(sample_method_id, 1)),\n            truth_feeding,\n            lambda: tf.cond(\n                tf.equal(sample_method_id, 2),\n                reward_feeding,\n                self_feeding))\n        return sample_ids\n\n    def next_inputs(self, time, outputs, state, sample_ids, name=None):\n        \"\"\"\n        notice the special form of 'state'([decoded_ids, rnn_state])\n        \"\"\"\n        finished, next_inputs, next_state = SampleEmbeddingHelper.next_inputs(\n            self, time, outputs, state[1], sample_ids, name)\n\n        next_state = [tf.concat(\n            [state[0][:, :time], tf.expand_dims(sample_ids, 1),\n             state[0][:, time + 1:]], axis=1), next_state]\n        next_state[0] = tf.reshape(next_state[0], (tf.shape(sample_ids)[0], 60))\n\n        return finished, next_inputs, next_state\n\n    def _sample_by_reward(self, time, state):\n        def _get_rewards(time, prefix_ids, target_ids, ground_truth_length):\n            batch_size = np.shape(target_ids)[0]\n            words_in_target = \\\n                [np.unique(target_ids[i]) for i in range(batch_size)]\n            unk_id = self._vocab.unk_token_id\n            eos_id = self._vocab.eos_token_id\n\n            # before append\n            baseline_scores = []\n            baseline_ids = prefix_ids[:, :time]\n            for i in range(batch_size):\n                ref = target_ids[i].tolist()\n                if self._vocab.eos_token_id in ref:\n                    ref = ref[:ref.index(self._vocab.eos_token_id)]\n\n                hypo = baseline_ids[i].tolist()\n                if self._vocab.eos_token_id in hypo:\n                    hypo = hypo[:hypo.index(self._vocab.eos_token_id)]\n\n                baseline_scores.append(calc_reward(\n                    refs=[ref], hypo=hypo, unk_id=unk_id,\n                    metric=self._metric))\n\n            # append UNK\n            syn_ids = np.concatenate([\n                prefix_ids[:, :time],\n                np.ones((batch_size, 1), dtype=np.int32) * unk_id], axis=1)\n\n            reward_unk = []\n            for i in range(batch_size):\n                ref = target_ids[i].tolist()\n                if self._vocab.eos_token_id in ref:\n                    ref = ref[:ref.index(self._vocab.eos_token_id)]\n\n                hypo = syn_ids[i].tolist()\n                if self._vocab.eos_token_id in hypo:\n                    hypo = hypo[:hypo.index(self._vocab.eos_token_id)]\n\n                reward = calc_reward(refs=[ref], hypo=hypo, unk_id=unk_id,\n                                     metric=self._metric)\n                reward_unk.append(\n                    np.ones((1, self._vocab.size), dtype=np.float32) *\n                    reward - baseline_scores[i])\n            result = np.concatenate(reward_unk, axis=0)\n\n            # append tokens\n            for i in range(batch_size):\n                for id in words_in_target[i]:\n                    if id == unk_id:\n                        continue\n\n                    syn_id = np.concatenate(\n                        [prefix_ids[i:i + 1, :time], np.array([[id, ]])],\n                        axis=1)\n                    hypo = syn_id[0].tolist()\n                    if self._vocab.eos_token_id in hypo:\n                        hypo = hypo[:hypo.index(self._vocab.eos_token_id)]\n\n                    ref = target_ids[i].tolist()\n                    if self._vocab.eos_token_id in ref:\n                        ref = ref[:ref.index(self._vocab.eos_token_id)]\n\n                    dup = 1. if prefix_ids[i][time] == id and \\\n                                id != unk_id else 0.\n                    eos = 1. if time < ground_truth_length[i] - 1 and \\\n                                id == eos_id else 0.\n\n                    reward = calc_reward(\n                        refs=[ref], hypo=hypo, unk_id=unk_id,\n                        metric=self._metric)\n                    result[i][id] = reward - baseline_scores[i] - dup - eos\n\n            return result\n\n        sampler = tf.distributions.Categorical(\n            logits=tf.py_func(_get_rewards, [\n                time, state[0], self._ground_truth,\n                self._ground_truth_length], tf.float32))\n        return tf.reshape(\n            sampler.sample(), (tf.shape(self._ground_truth)[0],))"
  },
  {
    "path": "texar_repo/examples/seq2seq_exposure_bias/interpolation_main.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\n\"\"\"Interpolation Algorithm.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\nfrom __future__ import unicode_literals\n\nimport importlib\nfrom io import open\n\nimport tensorflow as tf\nimport texar as tx\nimport numpy as np\n\nfrom interpolation_decoder import InterpolationDecoder\nfrom interpolation_helper import InterpolationHelper\nfrom rouge import Rouge\n\nflags = tf.flags\n\nflags.DEFINE_string(\"config_model\", \"configs.config_model\", \"The model config.\")\nflags.DEFINE_string(\"config_data\", \"configs.config_iwslt14\",\n                    \"The dataset config.\")\n\nflags.DEFINE_string('lambdas_init', '[0.04,0.96,0.0]',\n                    'initial value of lambdas')\n\nflags.DEFINE_float('delta_lambda_reward', 0.06,\n                   'increment of lambda_reward every annealing')\nflags.DEFINE_float('delta_lambda_self', 0.06,\n                   'decrement of lambda_self every annealing')\nflags.DEFINE_integer('lambda_reward_steps', 4,\n                     'times of increasing lambda_reward '\n                     'after incresing lambda_self once')\n\nflags.DEFINE_string('output_dir', '.', 'where to keep training logs')\n\nFLAGS = flags.FLAGS\n\nconfig_model = importlib.import_module(FLAGS.config_model)\nconfig_data = importlib.import_module(FLAGS.config_data)\n\nFLAGS.lambdas_init = eval(FLAGS.lambdas_init)\n\nif not FLAGS.output_dir.endswith('/'):\n    FLAGS.output_dir += '/'\nlog_dir = FLAGS.output_dir + 'training_log_interpolation' +\\\n          '_init' + '_' + str(FLAGS.lambdas_init[0]) +\\\n          '_' + str(FLAGS.lambdas_init[1]) +\\\n          '_' + str(FLAGS.lambdas_init[2]) +\\\n          '_dr' + str(FLAGS.delta_lambda_reward) +\\\n          '_ds' + str(FLAGS.delta_lambda_self) +\\\n          '_rstep' + str(FLAGS.lambda_reward_steps) + '/'\ntx.utils.maybe_create_dir(log_dir)\n\n\ndef build_model(batch, train_data, lambdas):\n    \"\"\"\n    This function is basically the same as build_model() in\n    baseline_seq2seq_attn.py, except the\n    InterpolateDecoder and InterpolateHelper.\n    \"\"\"\n    batch_size = tf.shape(batch['target_length'])[0]\n\n    source_embedder = tx.modules.WordEmbedder(\n        vocab_size=train_data.source_vocab.size, hparams=config_model.embedder)\n\n    encoder = tx.modules.BidirectionalRNNEncoder(\n        hparams=config_model.encoder)\n\n    enc_outputs, _ = encoder(source_embedder(batch['source_text_ids']))\n\n    target_embedder = tx.modules.WordEmbedder(\n        vocab_size=train_data.target_vocab.size, hparams=config_model.embedder)\n\n    decoder = InterpolationDecoder(\n        memory=tf.concat(enc_outputs, axis=2),\n        memory_sequence_length=batch['source_length'],\n        vocab_size=train_data.target_vocab.size,\n        hparams=config_model.decoder)\n\n    start_tokens = tf.ones_like(\n        batch['target_length']) * train_data.target_vocab.bos_token_id\n    helper = InterpolationHelper(\n        embedding=target_embedder,\n        start_tokens=start_tokens,\n        end_token=train_data.target_vocab.eos_token_id,\n        reward_metric=config_data.eval_metric,\n        vocab=train_data.target_vocab,\n        ground_truth=batch['target_text_ids'][:, 1:],\n        ground_truth_length=batch['target_length'] - 1,\n        lambdas=lambdas,)\n\n    training_outputs, _, training_length = decoder(\n        helper=helper,\n        initial_state=decoder.zero_state(\n            batch_size=batch_size, dtype=tf.float32),\n        max_decoding_length=60)\n\n    train_op = tx.core.get_train_op(\n        tx.losses.sequence_sparse_softmax_cross_entropy(\n            labels=training_outputs.sample_id,\n            logits=training_outputs.logits,\n            sequence_length=training_length),\n        hparams=config_model.opt)\n\n    beam_search_outputs, _, _ = \\\n        tx.modules.beam_search_decode(\n            decoder_or_cell=decoder,\n            embedding=target_embedder,\n            start_tokens=start_tokens,\n            end_token=train_data.target_vocab.eos_token_id,\n            beam_width=config_model.beam_width,\n            max_decoding_length=60)\n\n    return train_op, beam_search_outputs\n\n\ndef print_stdout_and_file(content, file):\n    print(content)\n    print(content, file=file)\n\n\ndef main():\n    \"\"\"Entrypoint.\n    \"\"\"\n    training_data = tx.data.PairedTextData(hparams=config_data.train)\n    val_data = tx.data.PairedTextData(hparams=config_data.val)\n    test_data = tx.data.PairedTextData(hparams=config_data.test)\n    data_iterator = tx.data.TrainTestDataIterator(\n        train=training_data, val=val_data, test=test_data)\n\n    batch = data_iterator.get_next()\n    lambdas_ts = tf.placeholder(shape=[3], dtype=tf.float32)\n\n    train_op, infer_outputs = build_model(batch, training_data, lambdas_ts)\n\n    def _train_epoch(sess, epoch, lambdas):\n        data_iterator.switch_to_train_data(sess)\n        log_file = open(log_dir + 'training_log' + str(epoch) + '.txt', 'w',\n                        encoding='utf-8')\n\n        step = 0\n        while True:\n            try:\n                loss = sess.run(train_op, feed_dict={\n                    lambdas_ts: np.array(lambdas)})\n                print(\"step={}, loss={:.4f}, lambdas={}\".format(\n                    step, loss, lambdas), file=log_file)\n                if step % config_data.observe_steps == 0:\n                    print(\"step={}, loss={:.4f}, lambdas={}\".format(\n                        step, loss, lambdas))\n                log_file.flush()\n                step += 1\n\n            except tf.errors.OutOfRangeError:\n                break\n\n    def _eval_epoch(sess, mode, epoch_no):\n        \"\"\"\n        This function is the same as _eval_epoch() in\n        baseline_seq2seq_attn_main.py.\n        \"\"\"\n        if mode == 'val':\n            data_iterator.switch_to_val_data(sess)\n        else:\n            data_iterator.switch_to_test_data(sess)\n\n        refs, hypos = [], []\n        while True:\n            try:\n                fetches = [\n                    batch['target_text'][:, 1:],\n                    infer_outputs.predicted_ids[:, :, 0]\n                ]\n                feed_dict = {\n                    tx.global_mode(): tf.estimator.ModeKeys.EVAL\n                }\n                target_texts_ori, output_ids = \\\n                    sess.run(fetches, feed_dict=feed_dict)\n\n                target_texts = tx.utils.strip_special_tokens(\n                    target_texts_ori.tolist(), is_token_list=True)\n                target_texts = tx.utils.str_join(target_texts)\n                output_texts = tx.utils.map_ids_to_strs(\n                    ids=output_ids, vocab=val_data.target_vocab)\n\n                tx.utils.write_paired_text(\n                    target_texts, output_texts,\n                    log_dir + mode + '_results' + str(epoch_no) + '.txt',\n                    append=True, mode='h', sep=' ||| ')\n\n                for hypo, ref in zip(output_texts, target_texts):\n                    if config_data.eval_metric == 'bleu':\n                        hypos.append(hypo)\n                        refs.append([ref])\n                    elif config_data.eval_metric == 'rouge':\n                        hypos.append(tx.utils.compat_as_text(hypo))\n                        refs.append(tx.utils.compat_as_text(ref))\n            except tf.errors.OutOfRangeError:\n                break\n\n        if config_data.eval_metric == 'bleu':\n            return tx.evals.corpus_bleu_moses(\n                list_of_references=refs, hypotheses=hypos)\n        elif config_data.eval_metric == 'rouge':\n            rouge = Rouge()\n            return rouge.get_scores(hyps=hypos, refs=refs, avg=True)\n\n    def _calc_reward(score):\n        \"\"\"\n        Return the bleu score or the sum of (Rouge-1, Rouge-2, Rouge-L).\n        \"\"\"\n        if config_data.eval_metric == 'bleu':\n            return score\n        elif config_data.eval_metric == 'rouge':\n            return sum([value['f'] for key, value in score.items()])\n\n    def _anneal():\n        \"\"\"\n        Operate lambdas when the reward of val set decrease.\n        \"\"\"\n        def _update_self():\n            \"\"\"\n            Decrease lambda_truth and increase lambda_self.\n            \"\"\"\n            lambdas[1] -= FLAGS.delta_lambda_self\n            lambdas[0] += FLAGS.delta_lambda_self\n            updates.append('self')\n\n        def _update_rew():\n            \"\"\"\n            Decrease lambda_truth and increase lambda_reward.\n            \"\"\"\n            lambdas[1] -= FLAGS.delta_lambda_reward\n            lambdas[2] += FLAGS.delta_lambda_reward\n            updates.append('rew')\n\n        if updates[-FLAGS.lambda_reward_steps:] == \\\n                ['rew'] * FLAGS.lambda_reward_steps:\n            _update_self()\n        else:\n            _update_rew()\n\n    saver = tf.train.Saver(max_to_keep=2)\n    with tf.Session() as sess:\n        sess.run(tf.global_variables_initializer())\n        sess.run(tf.local_variables_initializer())\n        sess.run(tf.tables_initializer())\n\n        lambdas = FLAGS.lambdas_init\n        updates = ['rew'] * FLAGS.lambda_reward_steps\n\n        best_val_score, best_val_score_current_lambdas = -1., -1.\n        scores_file = open(log_dir + 'scores.txt', 'w', encoding='utf-8')\n\n        for i in range(config_data.num_epochs):\n            print_stdout_and_file(\n                'training epoch={}, lambdas={}'.format(i, lambdas),\n                file=scores_file)\n            _train_epoch(sess, i, lambdas)\n            saver.save(sess, log_dir + 'models/model{}.ckpt'.format(i))\n\n            val_score = _eval_epoch(sess, 'val', i)\n            test_score = _eval_epoch(sess, 'test', i)\n\n            if _calc_reward(val_score) < best_val_score_current_lambdas:\n                _anneal()\n                best_val_score_current_lambdas = -1.\n                saver.restore(\n                    sess, log_dir + 'models/model{}.ckpt'.format(i - 1))\n            else:\n                best_val_score_current_lambdas = _calc_reward(val_score)\n\n            best_val_score = max(best_val_score, _calc_reward(val_score))\n\n            if config_data.eval_metric == 'bleu':\n                print_stdout_and_file(\n                    'val epoch={}, BLEU={:.4f}; best-ever={:.4f}'.format(\n                        i, val_score, best_val_score), file=scores_file)\n\n                print_stdout_and_file(\n                    'test epoch={}, BLEU={:.4f}'.format(i, test_score),\n                    file=scores_file)\n                print_stdout_and_file('=' * 50, file=scores_file)\n\n            elif config_data.eval_metric == 'rouge':\n                print_stdout_and_file(\n                    'valid epoch {}:'.format(i), file=scores_file)\n                for key, value in val_score.items():\n                    print_stdout_and_file(\n                        '{}: {}'.format(key, value), file=scores_file)\n                print_stdout_and_file('fsum: {}; best_val_fsum: {}'.format(\n                    _calc_reward(val_score), best_val_score), file=scores_file)\n\n                print_stdout_and_file(\n                    'test epoch {}:'.format(i), file=scores_file)\n                for key, value in test_score.items():\n                    print_stdout_and_file(\n                        '{}: {}'.format(key, value), file=scores_file)\n                print_stdout_and_file('=' * 110, file=scores_file)\n\n            scores_file.flush()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "texar_repo/examples/seq2seq_exposure_bias/raml_main.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\n\"\"\"\nAttentional Seq2seq with RAML algorithm.\n\nRead a pre-processed file containing the augmented samples and\ncorresponding rewards for every target sentence.\n\nRAML Algorithm is described in https://arxiv.org/pdf/1705.07136.pdf\n\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\nfrom __future__ import unicode_literals\n\nfrom io import open\nimport importlib\nimport tensorflow as tf\nimport texar as tx\nimport numpy as np\nimport random\nfrom rouge import Rouge\n\nflags = tf.flags\n\nflags.DEFINE_string(\"config_model\", \"configs.config_model\", \"The model config.\")\nflags.DEFINE_string(\"config_data\", \"configs.config_iwslt14\",\n                    \"The dataset config.\")\n\nflags.DEFINE_string('raml_file', 'data/iwslt14/samples_iwslt14.txt',\n                    'the samples and rewards described in RAML')\nflags.DEFINE_integer('n_samples', 10,\n                     'number of samples for every target sentence')\nflags.DEFINE_float('tau', 0.4, 'the temperature in RAML algorithm')\n\nflags.DEFINE_string('output_dir', '.', 'where to keep training logs')\n\nFLAGS = flags.FLAGS\n\nconfig_model = importlib.import_module(FLAGS.config_model)\nconfig_data = importlib.import_module(FLAGS.config_data)\n\nif not FLAGS.output_dir.endswith('/'):\n    FLAGS.output_dir += '/'\nlog_dir = FLAGS.output_dir + 'training_log_raml' +\\\n          '_' + str(FLAGS.n_samples) + 'samples' +\\\n          '_tau' + str(FLAGS.tau) + '/'\ntx.utils.maybe_create_dir(log_dir)\n\n\ndef read_raml_sample_file():\n    raml_file = open(FLAGS.raml_file, encoding='utf-8')\n\n    train_data = []\n    sample_num = -1\n    for line in raml_file.readlines():\n        line = line[:-1]\n        if line.startswith('***'):\n            continue\n        elif line.endswith('samples'):\n            sample_num = eval(line.split()[0])\n            assert sample_num == 1 or sample_num == FLAGS.n_samples\n        elif line.startswith('source:'):\n            train_data.append({'source': line[7:], 'targets': []})\n        else:\n            train_data[-1]['targets'].append(line.split('|||'))\n            if sample_num == 1:\n                for i in range(FLAGS.n_samples - 1):\n                    train_data[-1]['targets'].append(line.split('|||'))\n    return train_data\n\n\ndef raml_loss(batch, output, training_rewards):\n    mle_loss = tx.losses.sequence_sparse_softmax_cross_entropy(\n        labels=batch['target_text_ids'][:, 1:],\n        logits=output.logits,\n        sequence_length=batch['target_length'] - 1,\n        average_across_batch=False)\n    return tf.reduce_sum(mle_loss * training_rewards) /\\\n           tf.reduce_sum(training_rewards)\n\n\ndef build_model(batch, train_data, rewards):\n    \"\"\"\n    Assembles the seq2seq model.\n    Code in this function is basically the same of build_model() in\n    baseline_seq2seq_attn_main.py except the normalization in loss_fn.\n    \"\"\"\n    source_embedder = tx.modules.WordEmbedder(\n        vocab_size=train_data.source_vocab.size, hparams=config_model.embedder)\n\n    encoder = tx.modules.BidirectionalRNNEncoder(\n        hparams=config_model.encoder)\n\n    enc_outputs, _ = encoder(source_embedder(batch['source_text_ids']))\n\n    target_embedder = tx.modules.WordEmbedder(\n        vocab_size=train_data.target_vocab.size, hparams=config_model.embedder)\n\n    decoder = tx.modules.AttentionRNNDecoder(\n        memory=tf.concat(enc_outputs, axis=2),\n        memory_sequence_length=batch['source_length'],\n        vocab_size=train_data.target_vocab.size,\n        hparams=config_model.decoder)\n\n    training_outputs, _, _ = decoder(\n        decoding_strategy='train_greedy',\n        inputs=target_embedder(batch['target_text_ids'][:, :-1]),\n        sequence_length=batch['target_length'] - 1)\n\n    train_op = tx.core.get_train_op(\n        raml_loss(batch, training_outputs, rewards),\n        hparams=config_model.opt)\n\n    start_tokens = tf.ones_like(batch['target_length']) *\\\n                   train_data.target_vocab.bos_token_id\n    beam_search_outputs, _, _ = \\\n        tx.modules.beam_search_decode(\n            decoder_or_cell=decoder,\n            embedding=target_embedder,\n            start_tokens=start_tokens,\n            end_token=train_data.target_vocab.eos_token_id,\n            beam_width=config_model.beam_width,\n            max_decoding_length=60)\n\n    return train_op, beam_search_outputs\n\n\ndef print_stdout_and_file(content, file):\n    print(content)\n    print(content, file=file)\n\n\ndef main():\n    \"\"\"Entrypoint.\n    \"\"\"\n    config_data.train['batch_size'] *= FLAGS.n_samples\n    config_data.val['batch_size'] *= FLAGS.n_samples\n    config_data.test['batch_size'] *= FLAGS.n_samples\n\n    train_data = tx.data.PairedTextData(hparams=config_data.train)\n    val_data = tx.data.PairedTextData(hparams=config_data.val)\n    test_data = tx.data.PairedTextData(hparams=config_data.test)\n    data_iterator = tx.data.TrainTestDataIterator(\n        train=train_data, val=val_data, test=test_data)\n\n    batch = data_iterator.get_next()\n    rewards_ts = tf.placeholder(\n        dtype=tf.float32, shape=[None, ], name='training_rewards')\n\n    train_op, infer_outputs = build_model(batch, train_data, rewards_ts)\n\n    raml_train_data = read_raml_sample_file()\n\n    def _train_epoch(sess, epoch_no):\n        data_iterator.switch_to_train_data(sess)\n        training_log_file = \\\n            open(log_dir + 'training_log' + str(epoch_no) + '.txt', 'w',\n                 encoding='utf-8')\n\n        step = 0\n        source_buffer, target_buffer = [], []\n        random.shuffle(raml_train_data)\n        for training_pair in raml_train_data:\n            for target in training_pair['targets']:\n                source_buffer.append(training_pair['source'])\n                target_buffer.append(target)\n\n            if len(target_buffer) != train_data.batch_size:\n                continue\n\n            source_ids = []\n            source_length = []\n            target_ids = []\n            target_length = []\n            scores = []\n\n            trunc_len_src = train_data.hparams.source_dataset.max_seq_length\n            trunc_len_tgt = train_data.hparams.target_dataset.max_seq_length\n\n            for sentence in source_buffer:\n                ids = [train_data.source_vocab.token_to_id_map_py[token]\n                       for token in sentence.split()][:trunc_len_src]\n                ids = ids + [train_data.source_vocab.eos_token_id]\n\n                source_ids.append(ids)\n                source_length.append(len(ids))\n\n            for sentence, score_str in target_buffer:\n                ids = [train_data.target_vocab.bos_token_id]\n                ids = ids + [train_data.target_vocab.token_to_id_map_py[token]\n                             for token in sentence.split()][:trunc_len_tgt]\n                ids = ids + [train_data.target_vocab.eos_token_id]\n\n                target_ids.append(ids)\n                scores.append(eval(score_str))\n                target_length.append(len(ids))\n\n            rewards = []\n            for i in range(0, train_data.batch_size, FLAGS.n_samples):\n                tmp = np.array(scores[i:i + FLAGS.n_samples])\n                tmp = np.exp(tmp / FLAGS.tau) / np.sum(np.exp(tmp / FLAGS.tau))\n                for j in range(0, FLAGS.n_samples):\n                    rewards.append(tmp[j])\n\n            for value in source_ids:\n                while len(value) < max(source_length):\n                    value.append(0)\n            for value in target_ids:\n                while len(value) < max(target_length):\n                    value.append(0)\n\n            feed_dict = {\n                batch['source_text_ids']: np.array(source_ids),\n                batch['target_text_ids']: np.array(target_ids),\n                batch['source_length']: np.array(source_length),\n                batch['target_length']: np.array(target_length),\n                rewards_ts: np.array(rewards)\n            }\n            source_buffer = []\n            target_buffer = []\n\n            loss = sess.run(train_op, feed_dict=feed_dict)\n            print(\"step={}, loss={:.4f}\".format(step, loss),\n                  file=training_log_file)\n            if step % config_data.observe_steps == 0:\n                print(\"step={}, loss={:.4f}\".format(step, loss))\n            training_log_file.flush()\n            step += 1\n\n    # code below this line is exactly the same as baseline_seq2seq_attn_main.py\n\n    def _eval_epoch(sess, mode, epoch_no):\n        if mode == 'val':\n            data_iterator.switch_to_val_data(sess)\n        else:\n            data_iterator.switch_to_test_data(sess)\n\n        refs, hypos = [], []\n        while True:\n            try:\n                fetches = [\n                    batch['target_text'][:, 1:],\n                    infer_outputs.predicted_ids[:, :, 0]\n                ]\n                feed_dict = {\n                    tx.global_mode(): tf.estimator.ModeKeys.EVAL\n                }\n                target_texts_ori, output_ids = \\\n                    sess.run(fetches, feed_dict=feed_dict)\n\n                target_texts = tx.utils.strip_special_tokens(\n                    target_texts_ori.tolist(), is_token_list=True)\n                target_texts = tx.utils.str_join(target_texts)\n                output_texts = tx.utils.map_ids_to_strs(\n                    ids=output_ids, vocab=val_data.target_vocab)\n\n                tx.utils.write_paired_text(\n                    target_texts, output_texts,\n                    log_dir + mode + '_results' + str(epoch_no) + '.txt',\n                    append=True, mode='h', sep=' ||| ')\n\n                for hypo, ref in zip(output_texts, target_texts):\n                    if config_data.eval_metric == 'bleu':\n                        hypos.append(hypo)\n                        refs.append([ref])\n                    elif config_data.eval_metric == 'rouge':\n                        hypos.append(tx.utils.compat_as_text(hypo))\n                        refs.append(tx.utils.compat_as_text(ref))\n            except tf.errors.OutOfRangeError:\n                break\n\n        if config_data.eval_metric == 'bleu':\n            return tx.evals.corpus_bleu_moses(\n                list_of_references=refs, hypotheses=hypos)\n        elif config_data.eval_metric == 'rouge':\n            rouge = Rouge()\n            return rouge.get_scores(hyps=hypos, refs=refs, avg=True)\n\n    def _calc_reward(score):\n        \"\"\"\n        Return the bleu score or the sum of (Rouge-1, Rouge-2, Rouge-L).\n        \"\"\"\n        if config_data.eval_metric == 'bleu':\n            return score\n        elif config_data.eval_metric == 'rouge':\n            return sum([value['f'] for key, value in score.items()])\n\n    with tf.Session() as sess:\n        sess.run(tf.global_variables_initializer())\n        sess.run(tf.local_variables_initializer())\n        sess.run(tf.tables_initializer())\n\n        best_val_score = -1.\n        scores_file = open(log_dir + 'scores.txt', 'w', encoding='utf-8')\n        for i in range(config_data.num_epochs):\n            _train_epoch(sess, i)\n\n            val_score = _eval_epoch(sess, 'val', i)\n            test_score = _eval_epoch(sess, 'test', i)\n\n            best_val_score = max(best_val_score, _calc_reward(val_score))\n\n            if config_data.eval_metric == 'bleu':\n                print_stdout_and_file(\n                    'val epoch={}, BLEU={:.4f}; best-ever={:.4f}'.format(\n                        i, val_score, best_val_score), file=scores_file)\n\n                print_stdout_and_file(\n                    'test epoch={}, BLEU={:.4f}'.format(i, test_score),\n                    file=scores_file)\n                print_stdout_and_file('=' * 50, file=scores_file)\n\n            elif config_data.eval_metric == 'rouge':\n                print_stdout_and_file(\n                    'valid epoch {}:'.format(i), file=scores_file)\n                for key, value in val_score.items():\n                    print_stdout_and_file(\n                        '{}: {}'.format(key, value), file=scores_file)\n                print_stdout_and_file('fsum: {}; best_val_fsum: {}'.format(\n                    _calc_reward(val_score), best_val_score), file=scores_file)\n\n                print_stdout_and_file(\n                    'test epoch {}:'.format(i), file=scores_file)\n                for key, value in test_score.items():\n                    print_stdout_and_file(\n                        '{}: {}'.format(key, value), file=scores_file)\n                print_stdout_and_file('=' * 110, file=scores_file)\n\n            scores_file.flush()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "texar_repo/examples/seq2seq_exposure_bias/requirements.txt",
    "content": "rouge==0.2.1"
  },
  {
    "path": "texar_repo/examples/seq2seq_exposure_bias/scheduled_sampling_main.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nAttentional Seq2seq using Scheduled sampling algorithm.\n\nThis code is basically the same as baseline_seq2seq_attn_main.py,\nexcept using ScheduledEmbeddingTrainingHelper.\n\nScheduled Sampling Algorithm is described in https://arxiv.org/abs/1506.03099\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\nfrom __future__ import unicode_literals\n\n# pylint: disable=invalid-name, too-many-arguments, too-many-locals\n\nfrom io import open\nimport math\nimport importlib\nimport tensorflow as tf\nimport texar as tx\nfrom rouge import Rouge\n\nflags = tf.flags\n\nflags.DEFINE_string(\"config_model\", \"configs.config_model\", \"The model config.\")\nflags.DEFINE_string(\"config_data\", \"configs.config_iwslt14\",\n                    \"The dataset config.\")\n\nflags.DEFINE_float('decay_factor', 500.,\n                   'The hyperparameter controling the speed of increasing '\n                   'the probability of sampling from model')\n\nflags.DEFINE_string('output_dir', '.', 'where to keep training logs')\n\nFLAGS = flags.FLAGS\n\nconfig_model = importlib.import_module(FLAGS.config_model)\nconfig_data = importlib.import_module(FLAGS.config_data)\n\nif not FLAGS.output_dir.endswith('/'):\n    FLAGS.output_dir += '/'\nlog_dir = FLAGS.output_dir + 'training_log_scheduled_sampling' +\\\n          '_decayf' + str(FLAGS.decay_factor) + '/'\ntx.utils.maybe_create_dir(log_dir)\n\n\ndef inverse_sigmoid(i):\n    return FLAGS.decay_factor / (\n            FLAGS.decay_factor + math.exp(i / FLAGS.decay_factor))\n\n\ndef build_model(batch, train_data, self_sampling_proba):\n    \"\"\"\n    Assembles the seq2seq model.\n    It is the same as build_model() in baseline_seq2seq_attn.py except\n    using ScheduledEmbeddingTrainingHelper.\n    \"\"\"\n    source_embedder = tx.modules.WordEmbedder(\n        vocab_size=train_data.source_vocab.size, hparams=config_model.embedder)\n\n    encoder = tx.modules.BidirectionalRNNEncoder(\n        hparams=config_model.encoder)\n\n    enc_outputs, _ = encoder(source_embedder(batch['source_text_ids']))\n\n    target_embedder = tx.modules.WordEmbedder(\n        vocab_size=train_data.target_vocab.size, hparams=config_model.embedder)\n\n    decoder = tx.modules.AttentionRNNDecoder(\n        memory=tf.concat(enc_outputs, axis=2),\n        memory_sequence_length=batch['source_length'],\n        vocab_size=train_data.target_vocab.size,\n        hparams=config_model.decoder)\n\n    helper = tx.modules.get_helper(\n        helper_type='ScheduledEmbeddingTrainingHelper',\n        inputs=target_embedder(batch['target_text_ids'][:, :-1]),\n        sequence_length=batch['target_length'] - 1,\n        embedding=target_embedder,\n        sampling_probability=self_sampling_proba)\n\n    training_outputs, _, _ = decoder(\n        helper=helper, initial_state=decoder.zero_state(\n            batch_size=tf.shape(batch['target_length'])[0], dtype=tf.float32))\n\n    train_op = tx.core.get_train_op(\n        tx.losses.sequence_sparse_softmax_cross_entropy(\n            labels=batch['target_text_ids'][:, 1:],\n            logits=training_outputs.logits,\n            sequence_length=batch['target_length'] - 1),\n        hparams=config_model.opt)\n\n    start_tokens = tf.ones_like(batch['target_length']) *\\\n                   train_data.target_vocab.bos_token_id\n    beam_search_outputs, _, _ = \\\n        tx.modules.beam_search_decode(\n            decoder_or_cell=decoder,\n            embedding=target_embedder,\n            start_tokens=start_tokens,\n            end_token=train_data.target_vocab.eos_token_id,\n            beam_width=config_model.beam_width,\n            max_decoding_length=60)\n\n    return train_op, beam_search_outputs\n\n\ndef print_stdout_and_file(content, file):\n    print(content)\n    print(content, file=file)\n\n\ndef main():\n    \"\"\"Entrypoint.\n    \"\"\"\n    train_data = tx.data.PairedTextData(hparams=config_data.train)\n    val_data = tx.data.PairedTextData(hparams=config_data.val)\n    test_data = tx.data.PairedTextData(hparams=config_data.test)\n    data_iterator = tx.data.TrainTestDataIterator(\n        train=train_data, val=val_data, test=test_data)\n\n    batch = data_iterator.get_next()\n\n    self_sampling_proba = tf.placeholder(shape=[], dtype=tf.float32)\n    train_op, infer_outputs = \\\n        build_model(batch, train_data, self_sampling_proba)\n\n    def _train_epoch(sess, epoch_no, total_step_counter):\n        data_iterator.switch_to_train_data(sess)\n        training_log_file = \\\n            open(log_dir + 'training_log' + str(epoch_no) + '.txt', 'w',\n                 encoding='utf-8')\n\n        step = 0\n        while True:\n            try:\n                sampling_proba_ = 1. - inverse_sigmoid(total_step_counter)\n                loss = sess.run(train_op, feed_dict={\n                    self_sampling_proba: sampling_proba_})\n                print(\"step={}, loss={:.4f}, self_proba={}\".format(\n                    step, loss, sampling_proba_), file=training_log_file)\n                if step % config_data.observe_steps == 0:\n                    print(\"step={}, loss={:.4f}, self_proba={}\".format(\n                        step, loss, sampling_proba_))\n                training_log_file.flush()\n                step += 1\n                total_step_counter += 1\n            except tf.errors.OutOfRangeError:\n                break\n\n    # code below this line is exactly the same as baseline_seq2seq_attn_main.py\n\n    def _eval_epoch(sess, mode, epoch_no):\n        if mode == 'val':\n            data_iterator.switch_to_val_data(sess)\n        else:\n            data_iterator.switch_to_test_data(sess)\n\n        refs, hypos = [], []\n        while True:\n            try:\n                fetches = [\n                    batch['target_text'][:, 1:],\n                    infer_outputs.predicted_ids[:, :, 0]\n                ]\n                feed_dict = {\n                    tx.global_mode(): tf.estimator.ModeKeys.EVAL\n                }\n                target_texts_ori, output_ids = \\\n                    sess.run(fetches, feed_dict=feed_dict)\n\n                target_texts = tx.utils.strip_special_tokens(\n                    target_texts_ori.tolist(), is_token_list=True)\n                target_texts = tx.utils.str_join(target_texts)\n                output_texts = tx.utils.map_ids_to_strs(\n                    ids=output_ids, vocab=val_data.target_vocab)\n\n                tx.utils.write_paired_text(\n                    target_texts, output_texts,\n                    log_dir + mode + '_results' + str(epoch_no) + '.txt',\n                    append=True, mode='h', sep=' ||| ')\n\n                for hypo, ref in zip(output_texts, target_texts):\n                    if config_data.eval_metric == 'bleu':\n                        hypos.append(hypo)\n                        refs.append([ref])\n                    elif config_data.eval_metric == 'rouge':\n                        hypos.append(tx.utils.compat_as_text(hypo))\n                        refs.append(tx.utils.compat_as_text(ref))\n            except tf.errors.OutOfRangeError:\n                break\n\n        if config_data.eval_metric == 'bleu':\n            return tx.evals.corpus_bleu_moses(\n                list_of_references=refs, hypotheses=hypos)\n        elif config_data.eval_metric == 'rouge':\n            rouge = Rouge()\n            return rouge.get_scores(hyps=hypos, refs=refs, avg=True)\n\n    def _calc_reward(score):\n        \"\"\"\n        Return the bleu score or the sum of (Rouge-1, Rouge-2, Rouge-L).\n        \"\"\"\n        if config_data.eval_metric == 'bleu':\n            return score\n        elif config_data.eval_metric == 'rouge':\n            return sum([value['f'] for key, value in score.items()])\n\n    with tf.Session() as sess:\n        sess.run(tf.global_variables_initializer())\n        sess.run(tf.local_variables_initializer())\n        sess.run(tf.tables_initializer())\n\n        best_val_score = -1.\n        total_step_counter = 1\n        scores_file = open(log_dir + 'scores.txt', 'w', encoding='utf-8')\n        for i in range(config_data.num_epochs):\n            _train_epoch(sess, i, total_step_counter)\n\n            val_score = _eval_epoch(sess, 'val', i)\n            test_score = _eval_epoch(sess, 'test', i)\n\n            best_val_score = max(best_val_score, _calc_reward(val_score))\n\n            if config_data.eval_metric == 'bleu':\n                print_stdout_and_file(\n                    'val epoch={}, BLEU={:.4f}; best-ever={:.4f}'.format(\n                        i, val_score, best_val_score), file=scores_file)\n\n                print_stdout_and_file(\n                    'test epoch={}, BLEU={:.4f}'.format(i, test_score),\n                    file=scores_file)\n                print_stdout_and_file('=' * 50, file=scores_file)\n\n            elif config_data.eval_metric == 'rouge':\n                print_stdout_and_file(\n                    'valid epoch {}:'.format(i), file=scores_file)\n                for key, value in val_score.items():\n                    print_stdout_and_file(\n                        '{}: {}'.format(key, value), file=scores_file)\n                print_stdout_and_file('fsum: {}; best_val_fsum: {}'.format(\n                    _calc_reward(val_score), best_val_score), file=scores_file)\n\n                print_stdout_and_file(\n                    'test epoch {}:'.format(i), file=scores_file)\n                for key, value in test_score.items():\n                    print_stdout_and_file(\n                        '{}: {}'.format(key, value), file=scores_file)\n                print_stdout_and_file('=' * 110, file=scores_file)\n\n\n            scores_file.flush()\n\n\nif __name__ == '__main__':\n    main()\n\n"
  },
  {
    "path": "texar_repo/examples/seq2seq_exposure_bias/utils/prepare_data.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Downloads data.\n\"\"\"\nimport tensorflow as tf\nimport texar as tx\n\n# pylint: disable=invalid-name\n\nflags = tf.flags\n\nflags.DEFINE_string(\"data\", \"iwslt14\", \"Data to download [iwslt14|toy_copy]\")\n\nFLAGS = flags.FLAGS\n\n\ndef prepare_data():\n    \"\"\"Downloads data.\n    \"\"\"\n    if FLAGS.data == 'giga':\n        tx.data.maybe_download(\n            urls='https://drive.google.com/file/d/'\n                 '12RZs7QFwjj6dfuYNQ_0Ah-ccH1xFDMD5/view?usp=sharing',\n            path='./',\n            filenames='giga.zip',\n            extract=True)\n    elif FLAGS.data == 'iwslt14':\n        tx.data.maybe_download(\n            urls='https://drive.google.com/file/d/'\n                 '1y4mUWXRS2KstgHopCS9koZ42ENOh6Yb9/view?usp=sharing',\n            path='./',\n            filenames='iwslt14.zip',\n            extract=True)\n    else:\n        raise ValueError('Unknown data: {}'.format(FLAGS.data))\n\n\ndef main():\n    \"\"\"Entrypoint.\n    \"\"\"\n    prepare_data()\n\n\nif __name__ == '__main__':\n    main()"
  },
  {
    "path": "texar_repo/examples/seq2seq_exposure_bias/utils/raml_samples_generation/README.md",
    "content": "## Augmented Data Generation for RAML Algorithm\n\nCodes here are mainly copied from [pcyin's github](https://github.com/pcyin/pytorch_nmt), with slightly change for supporting ```rouge``` as reward. Note that we have also provided generated samples in the datasets that you can download.\n\nYou may tune hyperparameters in ```gen_samples_giga.sh```  or  ```gen_samples_iwslt14.sh``` and use commands like ```bash gen_samples_giga.sh``` to begin your generation.\n"
  },
  {
    "path": "texar_repo/examples/seq2seq_exposure_bias/utils/raml_samples_generation/gen_samples_giga.sh",
    "content": "#!/bin/sh\n\ntrain_src=\"../../data/giga/train.article\"\ntrain_tgt=\"../../data/giga/train.title\"\n\npython vocab.py \\\n\t--src_vocab_size 30424 \\\n\t--tgt_vocab_size 23738 \\\n\t--train_src ${train_src} \\\n\t--train_tgt ${train_tgt} \\\n\t--include_singleton \\\n\t--output giga_vocab.bin \n\npython process_samples.py \\\n    --mode sample_ngram \\\n    --vocab giga_vocab.bin \\\n    --src ${train_src} \\\n    --tgt ${train_tgt} \\\n\t--sample_size 10 \\\n\t--reward rouge \\\n    --output samples_giga.txt\n"
  },
  {
    "path": "texar_repo/examples/seq2seq_exposure_bias/utils/raml_samples_generation/gen_samples_iwslt14.sh",
    "content": "#!/bin/sh\n\ntrain_src=\"../../data/iwslt14/train.de\"\ntrain_tgt=\"../../data/iwslt14/train.en\"\n\npython vocab.py \\\n\t--src_vocab_size 32007 \\\n\t--tgt_vocab_size 22820 \\\n\t--train_src ${train_src} \\\n\t--train_tgt ${train_tgt} \\\n\t--include_singleton \\\n\t--output iwslt14_vocab.bin \n\npython process_samples.py \\\n    --mode sample_ngram \\\n    --vocab iwslt14_vocab.bin \\\n    --src ${train_src} \\\n    --tgt ${train_tgt} \\\n\t--sample_size 10 \\\n\t--reward bleu \\\n    --output samples_iwslt14.txt\n"
  },
  {
    "path": "texar_repo/examples/seq2seq_exposure_bias/utils/raml_samples_generation/process_samples.py",
    "content": "from __future__ import print_function\nfrom nltk.translate.bleu_score import sentence_bleu\nfrom nltk.translate.bleu_score import SmoothingFunction\nimport sys\nimport re\nimport argparse\nimport torch\nfrom util import read_corpus\nimport numpy as np\nfrom scipy.misc import comb\nfrom vocab import Vocab, VocabEntry\nimport math\nfrom rouge import Rouge\n\n\ndef is_valid_sample(sent):\n    tokens = sent.split(' ')\n    return len(tokens) >= 1 and len(tokens) < 50\n\n\ndef sample_from_model(args):\n    para_data = args.parallel_data\n    sample_file = args.sample_file\n    output = args.output\n\n    tgt_sent_pattern = re.compile('^\\[(\\d+)\\] (.*?)$')\n    para_data = [l.strip().split(' ||| ') for l in open(para_data)]\n\n    f_out = open(output, 'w')\n    f = open(sample_file)\n    f.readline()\n    for src_sent, tgt_sent in para_data:\n        line = f.readline().strip()\n        assert line.startswith('****')\n        line = f.readline().strip()\n        print(line)\n        assert line.startswith('target:')\n\n        tgt_sent2 = line[len('target:'):]\n        assert tgt_sent == tgt_sent2\n\n        line = f.readline().strip() # samples\n\n        tgt_sent = ' '.join(tgt_sent.split(' ')[1:-1])\n        tgt_samples = set()\n        for i in range(1, 101):\n            line = f.readline().rstrip('\\n')\n            m = tgt_sent_pattern.match(line)\n\n            assert m, line\n            assert int(m.group(1)) == i\n\n            sampled_tgt_sent = m.group(2).strip()\n\n            if is_valid_sample(sampled_tgt_sent):\n                tgt_samples.add(sampled_tgt_sent)\n\n        line = f.readline().strip()\n        assert line.startswith('****')\n\n        tgt_samples.add(tgt_sent)\n        tgt_samples = list(tgt_samples)\n\n        assert len(tgt_samples) > 0\n\n        tgt_ref_tokens = tgt_sent.split(' ')\n        bleu_scores = []\n        for tgt_sample in tgt_samples:\n            bleu_score = sentence_bleu([tgt_ref_tokens], tgt_sample.split(' '))\n            bleu_scores.append(bleu_score)\n\n        tgt_ranks = sorted(range(len(tgt_samples)), key=lambda i: bleu_scores[i], reverse=True)\n\n        print('%d samples' % len(tgt_samples))\n\n        print('*' * 50, file=f_out)\n        print('source: ' + src_sent, file=f_out)\n        print('%d samples' % len(tgt_samples), file=f_out)\n        for i in tgt_ranks:\n            print('%s ||| %f' % (tgt_samples[i], bleu_scores[i]), file=f_out)\n        print('*' * 50, file=f_out)\n\n    f_out.close()\n\n\ndef get_new_ngram(ngram, n, vocab):\n    \"\"\"\n    replace ngram `ngram` with a newly sampled ngram of the same length\n    \"\"\"\n\n    new_ngram_wids = [np.random.randint(3, len(vocab)) for i in range(n)]\n    new_ngram = [vocab.id2word[wid] for wid in new_ngram_wids]\n\n    return new_ngram\n\n\ndef sample_ngram(args):\n    src_sents = read_corpus(args.src, 'src')\n    tgt_sents = read_corpus(args.tgt, 'src')  # do not read in <s> and </s>\n    f_out = open(args.output, 'w')\n\n    vocab = torch.load(args.vocab)\n    tgt_vocab = vocab.tgt\n\n    smooth_bleu = args.smooth_bleu\n    sm_func = None\n    if smooth_bleu:\n        sm_func = SmoothingFunction().method3\n\n    for src_sent, tgt_sent in zip(src_sents, tgt_sents):\n        src_sent = ' '.join(src_sent)\n\n        tgt_len = len(tgt_sent)\n        tgt_samples = []\n        tgt_samples_distort_rates = []    # how many unigrams are replaced\n\n        # generate 100 samples\n\n        # append itself\n        tgt_samples.append(tgt_sent)\n        tgt_samples_distort_rates.append(0)\n\n        for sid in range(args.sample_size - 1):\n            n = np.random.randint(1, min(tgt_len, args.max_ngram_size + 1)) # we do not replace the last token: it must be a period!\n\n            idx = np.random.randint(tgt_len - n)\n            ngram = tgt_sent[idx: idx+n]\n            new_ngram = get_new_ngram(ngram, n, tgt_vocab)\n\n            sampled_tgt_sent = list(tgt_sent)\n            sampled_tgt_sent[idx: idx+n] = new_ngram\n\n            # compute the probability of this sample\n            # prob = 1. / args.max_ngram_size * 1. / (tgt_len - 1 + n) * 1 / (len(tgt_vocab) ** n)\n\n            tgt_samples.append(sampled_tgt_sent)\n            tgt_samples_distort_rates.append(n)\n\n        # compute bleu scores or edit distances and rank the samples by bleu scores\n        rewards = []\n        for tgt_sample, tgt_sample_distort_rate in zip(tgt_samples, tgt_samples_distort_rates):\n            if args.reward == 'bleu':\n                reward = sentence_bleu([tgt_sent], tgt_sample, smoothing_function=sm_func)\n            elif args.reward == 'rouge':\n                rouge = Rouge()\n                scores = rouge.get_scores(hyps=[' '.join(tgt_sample).decode('utf-8')], refs=[' '.join(tgt_sent).decode('utf-8')], avg=True)\n                reward = sum([value['f'] for key, value in scores.items()])\n            else:\n                reward = -tgt_sample_distort_rate\n\n            rewards.append(reward)\n\n        tgt_ranks = sorted(range(len(tgt_samples)), key=lambda i: rewards[i], reverse=True)\n        # convert list of tokens into a string\n        tgt_samples = [' '.join(tgt_sample) for tgt_sample in tgt_samples]\n\n        print('*' * 50, file=f_out)\n        print('source: ' + src_sent, file=f_out)\n        print('%d samples' % len(tgt_samples), file=f_out)\n        for i in tgt_ranks:\n            print('%s ||| %f' % (tgt_samples[i], rewards[i]), file=f_out)\n        print('*' * 50, file=f_out)\n\n    f_out.close()\n\n\ndef sample_ngram_adapt(args):\n    src_sents = read_corpus(args.src, 'src')\n    tgt_sents = read_corpus(args.tgt, 'src')  # do not read in <s> and </s>\n    f_out = open(args.output, 'w')\n\n    vocab = torch.load(args.vocab)\n    tgt_vocab = vocab.tgt\n\n    max_len = max([len(tgt_sent) for tgt_sent in tgt_sents]) + 1\n\n    for src_sent, tgt_sent in zip(src_sents, tgt_sents):\n        src_sent = ' '.join(src_sent)\n\n        tgt_len = len(tgt_sent)\n        tgt_samples = []\n\n        # generate 100 samples\n\n        # append itself\n        tgt_samples.append(tgt_sent)\n\n        for sid in range(args.sample_size - 1):\n            max_n = min(tgt_len - 1, 4)\n            bias_n = int(max_n * tgt_len / max_len) + 1\n            assert 1 <= bias_n <= 4, 'bias_n={}, not in [1,4], max_n={}, tgt_len={}, max_len={}'.format(bias_n, max_n, tgt_len, max_len)\n\n            p = [1.0/(max_n + 5)] * max_n\n            p[bias_n - 1] = 1 - p[0] * (max_n - 1)\n            assert abs(sum(p) - 1) < 1e-10, 'sum(p) != 1'\n\n            n = np.random.choice(np.arange(1, int(max_n + 1)), p=p)  # we do not replace the last token: it must be a period!\n            assert n < tgt_len, 'n={}, tgt_len={}'.format(n, tgt_len)\n\n            idx = np.random.randint(tgt_len - n)\n            ngram = tgt_sent[idx: idx+n]\n            new_ngram = get_new_ngram(ngram, n, tgt_vocab)\n\n            sampled_tgt_sent = list(tgt_sent)\n            sampled_tgt_sent[idx: idx+n] = new_ngram\n\n            tgt_samples.append(sampled_tgt_sent)\n\n        # compute bleu scores and rank the samples by bleu scores\n        bleu_scores = []\n        for tgt_sample in tgt_samples:\n            bleu_score = sentence_bleu([tgt_sent], tgt_sample)\n            bleu_scores.append(bleu_score)\n\n        tgt_ranks = sorted(range(len(tgt_samples)), key=lambda i: bleu_scores[i], reverse=True)\n        # convert list of tokens into a string\n        tgt_samples = [' '.join(tgt_sample) for tgt_sample in tgt_samples]\n\n        print('*' * 50, file=f_out)\n        print('source: ' + src_sent, file=f_out)\n        print('%d samples' % len(tgt_samples), file=f_out)\n        for i in tgt_ranks:\n            print('%s ||| %f' % (tgt_samples[i], bleu_scores[i]), file=f_out)\n        print('*' * 50, file=f_out)\n\n    f_out.close()\n\n\ndef sample_from_hamming_distance_payoff_distribution(args):\n    src_sents = read_corpus(args.src, 'src')\n    tgt_sents = read_corpus(args.tgt, 'src')  # do not read in <s> and </s>\n    f_out = open(args.output, 'w')\n\n    vocab = torch.load(args.vocab)\n    tgt_vocab = vocab.tgt\n\n    payoff_prob, Z_qs = generate_hamming_distance_payoff_distribution(max(len(sent) for sent in tgt_sents),\n                                                                      vocab_size=len(vocab.tgt),\n                                                                      tau=args.temp)\n\n    for src_sent, tgt_sent in zip(src_sents, tgt_sents):\n        tgt_samples = []  # make sure the ground truth y* is in the samples\n        tgt_sent_len = len(tgt_sent) - 3  # remove <s> and </s> and ending period .\n        tgt_ref_tokens = tgt_sent[1:-1]\n        bleu_scores = []\n\n        # sample an edit distances\n        e_samples = np.random.choice(range(tgt_sent_len + 1), p=payoff_prob[tgt_sent_len], size=args.sample_size,\n                                     replace=True)\n\n        for i, e in enumerate(e_samples):\n            if e > 0:\n                # sample a new tgt_sent $y$\n                old_word_pos = np.random.choice(range(1, tgt_sent_len + 1), size=e, replace=False)\n                new_words = [vocab.tgt.id2word[wid] for wid in np.random.randint(3, len(vocab.tgt), size=e)]\n                new_tgt_sent = list(tgt_sent)\n                for pos, word in zip(old_word_pos, new_words):\n                    new_tgt_sent[pos] = word\n\n                bleu_score = sentence_bleu([tgt_ref_tokens], new_tgt_sent[1:-1])\n                bleu_scores.append(bleu_score)\n            else:\n                new_tgt_sent = list(tgt_sent)\n                bleu_scores.append(1.)\n\n            # print('y: %s' % ' '.join(new_tgt_sent))\n            tgt_samples.append(new_tgt_sent)\n\n\ndef generate_hamming_distance_payoff_distribution(max_sent_len, vocab_size, tau=1.):\n    \"\"\"compute the q distribution for Hamming Distance (substitution only) as in the RAML paper\"\"\"\n    probs = dict()\n    Z_qs = dict()\n    for sent_len in range(1, max_sent_len + 1):\n        counts = [1.]  # e = 0, count = 1\n        for e in range(1, sent_len + 1):\n            # apply the rescaling trick as in https://gist.github.com/norouzi/8c4d244922fa052fa8ec18d8af52d366\n            count = comb(sent_len, e) * math.exp(-e / tau) * ((vocab_size - 1) ** (e - e / tau))\n            counts.append(count)\n\n        Z_qs[sent_len] = Z_q = sum(counts)\n        prob = [count / Z_q for count in counts]\n        probs[sent_len] = prob\n\n        # print('sent_len=%d, %s' % (sent_len, prob))\n\n    return probs, Z_qs\n\n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--mode', choices=['sample_from_model', 'sample_ngram_adapt', 'sample_ngram'], required=True)\n    parser.add_argument('--vocab', type=str)\n    parser.add_argument('--src', type=str)\n    parser.add_argument('--tgt', type=str)\n    parser.add_argument('--parallel_data', type=str)\n    parser.add_argument('--sample_file', type=str)\n    parser.add_argument('--output', type=str, required=True)\n    parser.add_argument('--sample_size', type=int, default=100)\n    parser.add_argument('--reward', choices=['bleu', 'edit_dist', 'rouge'], default='bleu')\n    parser.add_argument('--max_ngram_size', type=int, default=4)\n    parser.add_argument('--temp', type=float, default=0.5)\n    parser.add_argument('--smooth_bleu', action='store_true', default=False)\n\n    args = parser.parse_args()\n\n    if args.mode == 'sample_ngram':\n        sample_ngram(args)\n    elif args.mode == 'sample_from_model':\n        sample_from_model(args)\n    elif args.mode == 'sample_ngram_adapt':\n        sample_ngram_adapt(args)\n"
  },
  {
    "path": "texar_repo/examples/seq2seq_exposure_bias/utils/raml_samples_generation/util.py",
    "content": "from collections import defaultdict\nimport numpy as np\n\ndef read_corpus(file_path, source):\n    data = []\n    for line in open(file_path):\n        sent = line.strip().split(' ')\n        # only append <s> and </s> to the target sentence\n        if source == 'tgt':\n            sent = ['<s>'] + sent + ['</s>']\n        data.append(sent)\n\n    return data\n\n\ndef batch_slice(data, batch_size, sort=True):\n    batch_num = int(np.ceil(len(data) / float(batch_size)))\n    for i in range(batch_num):\n        cur_batch_size = batch_size if i < batch_num - 1 else len(data) - batch_size * i\n        src_sents = [data[i * batch_size + b][0] for b in range(cur_batch_size)]\n        tgt_sents = [data[i * batch_size + b][1] for b in range(cur_batch_size)]\n\n        if sort:\n            src_ids = sorted(range(cur_batch_size), key=lambda src_id: len(src_sents[src_id]), reverse=True)\n            src_sents = [src_sents[src_id] for src_id in src_ids]\n            tgt_sents = [tgt_sents[src_id] for src_id in src_ids]\n\n        yield src_sents, tgt_sents\n\n\ndef data_iter(data, batch_size, shuffle=True):\n    \"\"\"\n    randomly permute data, then sort by source length, and partition into batches\n    ensure that the length of source sentences in each batch is decreasing\n    \"\"\"\n\n    buckets = defaultdict(list)\n    for pair in data:\n        src_sent = pair[0]\n        buckets[len(src_sent)].append(pair)\n\n    batched_data = []\n    for src_len in buckets:\n        tuples = buckets[src_len]\n        if shuffle: np.random.shuffle(tuples)\n        batched_data.extend(list(batch_slice(tuples, batch_size)))\n\n    if shuffle:\n        np.random.shuffle(batched_data)\n    for batch in batched_data:\n        yield batch\n"
  },
  {
    "path": "texar_repo/examples/seq2seq_exposure_bias/utils/raml_samples_generation/vocab.py",
    "content": "from __future__ import print_function\nimport argparse\nfrom collections import Counter\nfrom itertools import chain\n\nimport torch\n\nfrom util import read_corpus\n\n\nclass VocabEntry(object):\n    def __init__(self):\n        self.word2id = dict()\n        self.unk_id = 3\n        self.word2id['<pad>'] = 0\n        self.word2id['<s>'] = 1\n        self.word2id['</s>'] = 2\n        self.word2id['<unk>'] = 3\n\n        self.id2word = {v: k for k, v in self.word2id.iteritems()}\n\n    def __getitem__(self, word):\n        return self.word2id.get(word, self.unk_id)\n\n    def __contains__(self, word):\n        return word in self.word2id\n\n    def __setitem__(self, key, value):\n        raise ValueError('vocabulary is readonly')\n\n    def __len__(self):\n        return len(self.word2id)\n\n    def __repr__(self):\n        return 'Vocabulary[size=%d]' % len(self)\n\n    def id2word(self, wid):\n        return self.id2word[wid]\n\n    def add(self, word):\n        if word not in self:\n            wid = self.word2id[word] = len(self)\n            self.id2word[wid] = word\n            return wid\n        else:\n            return self[word]\n\n    @staticmethod\n    def from_corpus(corpus, size, remove_singleton=True):\n        vocab_entry = VocabEntry()\n\n        word_freq = Counter(chain(*corpus))\n        non_singletons = [w for w in word_freq if word_freq[w] > 1]\n        print('number of word types: %d, number of word types w/ frequency > 1: %d' % (len(word_freq),\n                                                                                       len(non_singletons)))\n\n        top_k_words = sorted(word_freq.keys(), reverse=True, key=word_freq.get)[:size]\n\n        for word in top_k_words:\n            if len(vocab_entry) < size:\n                if not (word_freq[word] == 1 and remove_singleton):\n                    vocab_entry.add(word)\n\n        return vocab_entry\n\n\nclass Vocab(object):\n    def __init__(self, src_sents, tgt_sents, src_vocab_size, tgt_vocab_size, remove_singleton=True):\n        assert len(src_sents) == len(tgt_sents)\n\n        print('initialize source vocabulary ..')\n        self.src = VocabEntry.from_corpus(src_sents, src_vocab_size, remove_singleton=remove_singleton)\n\n        print('initialize target vocabulary ..')\n        self.tgt = VocabEntry.from_corpus(tgt_sents, tgt_vocab_size, remove_singleton=remove_singleton)\n\n    def __repr__(self):\n        return 'Vocab(source %d words, target %d words)' % (len(self.src), len(self.tgt))\n\n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--src_vocab_size', default=50000, type=int, help='source vocabulary size')\n    parser.add_argument('--tgt_vocab_size', default=50000, type=int, help='target vocabulary size')\n    parser.add_argument('--include_singleton', action='store_true', default=False, help='whether to include singleton'\n                                                                                        'in the vocabulary (default=False)')\n\n    parser.add_argument('--train_src', type=str, required=True, help='file of source sentences')\n    parser.add_argument('--train_tgt', type=str, required=True, help='file of target sentences')\n\n    parser.add_argument('--output', default='vocab.bin', type=str, help='output vocabulary file')\n\n    args = parser.parse_args()\n\n    print('read in source sentences: %s' % args.train_src)\n    print('read in target sentences: %s' % args.train_tgt)\n\n    src_sents = read_corpus(args.train_src, source='src')\n    tgt_sents = read_corpus(args.train_tgt, source='tgt')\n\n    vocab = Vocab(src_sents, tgt_sents, args.src_vocab_size, args.tgt_vocab_size, remove_singleton=not args.include_singleton)\n    print('generated vocabulary, source %d words, target %d words' % (len(vocab.src), len(vocab.tgt)))\n\n    torch.save(vocab, args.output)\n    print('vocabulary saved to %s' % args.output)"
  },
  {
    "path": "texar_repo/examples/seq2seq_rl/README.md",
    "content": "# Seq2seq Model with Policy Gradient Training #\n\nThis example builds an attentional seq2seq model that is trained with policy gradient and BLEU reward. The example is mainly for demonstration of the Texar sequence Reinforcement Learning APIs. No MLE pre-training is included so the model collapsed very quickly. In practice one would usually pretrain the model with teacher-forcing MLE (e.g., see the example [seq2seq_attn](../seq2seq_attn)) and continue to fine-tune with policy gradient. \n\nThe data and model configs are exact the same as the [MLE seq2seq example](../seq2seq_attn). The only difference is that MLE cross-entropy minimization is replaced with policy gradient training.\n\nThe example shows:\n  * Use of `texar.agents.SeqPGAgent` for policy gradient sequence generation.\n  * Use of the Python-based `texar.evals.sentence/corpus_bleu` for efficient reward computing, and the Moses `texar.evals.sentence/corpus_bleu_moses`\n    for standard test set evaluation.\n  * Use of `texar.data.FeedableDataIterator` for data feeding and resuming from breakpoint. \n\n## Usage ##\n\n### Dataset ###\n\nTwo example datasets are provided:\n\n  * toy_copy: A small toy autoencoding dataset from [TF Seq2seq toolkit](https://github.com/google/seq2seq/tree/2500c26add91b079ca00cf1f091db5a99ddab9ae).\n  * iwslt14: The benchmark [IWSLT2014](https://sites.google.com/site/iwsltevaluation2014/home) (de-en) machine translation dataset. \n\nDownload the data with the following cmds:\n\n```\npython prepare_data.py --data toy_copy\npython prepare_data.py --data iwslt14\n```\n\n### Train the model ###\n\nTrain the model with the following cmd:\n\n```\npython seq2seq_attn_pg.py --config_model config_model --config_data config_toy_copy\n```\n\nHere:\n  * `--config_model` specifies the model config. Note not to include the `.py` suffix.\n  * `--config_data` specifies the data config.\n\nAll configs are (mostly) the same as those in the [seq2seq_attn example](../seq2seq_attn).\n\n## Results ##\n\nThe code is for demonstrating Texar API. With pure policy gradient and without MLE pretraining the model collapse very quickly. \n"
  },
  {
    "path": "texar_repo/examples/seq2seq_rl/config_iwslt14.py",
    "content": "\ndisplay = 100\ndisplay_eval = 5500\n\nsource_vocab_file = './data/iwslt14/vocab.de'\ntarget_vocab_file = './data/iwslt14/vocab.en'\n\ntrain = {\n    'num_epochs': 10,\n    'batch_size': 32,\n    'allow_smaller_final_batch': False,\n    'source_dataset': {\n        \"files\": 'data/iwslt14/train.de',\n        'vocab_file': source_vocab_file,\n        'max_seq_length': 50\n    },\n    'target_dataset': {\n        'files': 'data/iwslt14/train.en',\n        'vocab_file': target_vocab_file,\n        'max_seq_length': 50\n    }\n}\nval = {\n    'batch_size': 32,\n    'shuffle': False,\n    'source_dataset': {\n        \"files\": 'data/iwslt14/valid.de',\n        'vocab_file': source_vocab_file,\n    },\n    'target_dataset': {\n        'files': 'data/iwslt14/valid.en',\n        'vocab_file': target_vocab_file,\n    }\n}\ntest = {\n    'batch_size': 32,\n    'shuffle': False,\n    'source_dataset': {\n        \"files\": 'data/iwslt14/test.de',\n        'vocab_file': source_vocab_file,\n    },\n    'target_dataset': {\n        'files': 'data/iwslt14/test.en',\n        'vocab_file': target_vocab_file,\n    }\n}\n"
  },
  {
    "path": "texar_repo/examples/seq2seq_rl/config_model.py",
    "content": "# Attentional Seq2seq model.\n# Hyperparameters not specified here will take the default values.\n\nnum_units = 256\nbeam_width = 10\n\nembedder = {\n    'dim': num_units\n}\nencoder = {\n    'rnn_cell_fw': {\n        'kwargs': {\n            'num_units': num_units\n        }\n    }\n}\ndecoder = {\n    'rnn_cell': {\n        'kwargs': {\n            'num_units': num_units\n        },\n    },\n    'attention': {\n        'kwargs': {\n            'num_units': num_units,\n        },\n        'attention_layer_size': num_units\n    }\n}\nagent = {\n    'discount_factor': 0.,\n    'entropy_weight': .5\n}\n"
  },
  {
    "path": "texar_repo/examples/seq2seq_rl/config_toy_copy.py",
    "content": "\ndisplay = 10\ndisplay_eval = 300\n\nsource_vocab_file = './data/toy_copy/train/vocab.sources.txt'\ntarget_vocab_file = './data/toy_copy/train/vocab.targets.txt'\n\ntrain = {\n    'num_epochs': 10,\n    'batch_size': 32,\n    'allow_smaller_final_batch': False,\n    'source_dataset': {\n        \"files\": './data/toy_copy/train/sources.txt',\n        'vocab_file': source_vocab_file\n    },\n    'target_dataset': {\n        'files': './data/toy_copy/train/targets.txt',\n        'vocab_file': target_vocab_file\n    }\n}\nval = {\n    'batch_size': 32,\n    'allow_smaller_final_batch': False,\n    'source_dataset': {\n        \"files\": './data/toy_copy/dev/sources.txt',\n        'vocab_file': source_vocab_file\n    },\n    'target_dataset': {\n        \"files\": './data/toy_copy/dev/targets.txt',\n        'vocab_file': target_vocab_file\n    }\n}\ntest = {\n    'batch_size': 32,\n    'allow_smaller_final_batch': False,\n    'source_dataset': {\n        \"files\": './data/toy_copy/test/sources.txt',\n        'vocab_file': source_vocab_file\n    },\n    'target_dataset': {\n        \"files\": './data/toy_copy/test/targets.txt',\n        'vocab_file': target_vocab_file\n    }\n}\n"
  },
  {
    "path": "texar_repo/examples/seq2seq_rl/prepare_data.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Downloads data.\n\"\"\"\nimport tensorflow as tf\nimport texar as tx\n\n# pylint: disable=invalid-name\n\nflags = tf.flags\n\nflags.DEFINE_string(\"data\", \"iwslt14\", \"Data to download [iwslt14|toy_copy]\")\n\nFLAGS = flags.FLAGS\n\ndef prepare_data():\n    \"\"\"Downloads data.\n    \"\"\"\n    if FLAGS.data == 'iwslt14':\n        tx.data.maybe_download(\n            urls='https://drive.google.com/file/d/'\n                 '1Vuv3bed10qUxrpldHdYoiWLzPKa4pNXd/view?usp=sharing',\n            path='./',\n            filenames='iwslt14.zip',\n            extract=True)\n    elif FLAGS.data == 'toy_copy':\n        tx.data.maybe_download(\n            urls='https://drive.google.com/file/d/'\n                 '1fENE2rakm8vJ8d3voWBgW4hGlS6-KORW/view?usp=sharing',\n            path='./',\n            filenames='toy_copy.zip',\n            extract=True)\n    else:\n        raise ValueError('Unknown data: {}'.format(FLAGS.data))\n\ndef main():\n    \"\"\"Entrypoint.\n    \"\"\"\n    prepare_data()\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "texar_repo/examples/seq2seq_rl/seq2seq_attn_pg.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Attentional Seq2seq trained with policy gradient.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\n\n#pylint: disable=invalid-name, too-many-arguments, too-many-locals\n\nimport importlib\nimport numpy as np\nimport tensorflow as tf\nimport texar as tx\n\nflags = tf.flags\n\nflags.DEFINE_string(\"config_model\", \"config_model\", \"The model config.\")\nflags.DEFINE_string(\"config_data\", \"config_iwslt14\", \"The dataset config.\")\n\nFLAGS = flags.FLAGS\n\nconfig_model = importlib.import_module(FLAGS.config_model)\nconfig_data = importlib.import_module(FLAGS.config_data)\n\n# A caveats of using `texar.agents.SeqPGAgent`:\n# The training data iterator should not run to raise `OutOfRangeError`,\n# otherwise the iterator cannot be re-initialized and may raise\n# `CancelledError`. This is probably because the iterator is used by\n# `tf.Session.partial_run` in `SeqPGAgent`.\n#\n# A simple workaround is to set `'num_epochs'` of training data to a large\n# number so that its iterator will never run into `OutOfRangeError`. Use\n# `texar.data.FeedableDataIterator` to periodically switch to dev/test data\n# for evaluation and switch back to the training data to resume from the\n# breakpoint.\n\ndef build_model(batch, train_data):\n    \"\"\"Assembles the seq2seq model.\n    \"\"\"\n    source_embedder = tx.modules.WordEmbedder(\n        vocab_size=train_data.source_vocab.size, hparams=config_model.embedder)\n\n    encoder = tx.modules.BidirectionalRNNEncoder(\n        hparams=config_model.encoder)\n\n    enc_outputs, _ = encoder(source_embedder(batch['source_text_ids']))\n\n    target_embedder = tx.modules.WordEmbedder(\n        vocab_size=train_data.target_vocab.size, hparams=config_model.embedder)\n\n    decoder = tx.modules.AttentionRNNDecoder(\n        memory=tf.concat(enc_outputs, axis=2),\n        memory_sequence_length=batch['source_length'],\n        vocab_size=train_data.target_vocab.size,\n        hparams=config_model.decoder)\n\n    start_tokens = tf.ones_like(batch['target_length']) * \\\n            train_data.target_vocab.bos_token_id\n\n    outputs, _, sequence_length = decoder(\n        decoding_strategy='infer_sample',\n        start_tokens=start_tokens,\n        end_token=train_data.target_vocab.eos_token_id,\n        embedding=target_embedder,\n        max_decoding_length=30)\n\n    beam_search_outputs, _, _ = \\\n        tx.modules.beam_search_decode(\n            decoder_or_cell=decoder,\n            embedding=target_embedder,\n            start_tokens=start_tokens,\n            end_token=train_data.target_vocab.eos_token_id,\n            beam_width=config_model.beam_width,\n            max_decoding_length=60)\n\n    return outputs, sequence_length, beam_search_outputs\n\n\ndef main():\n    \"\"\"Entrypoint.\n    \"\"\"\n    train_data = tx.data.PairedTextData(hparams=config_data.train)\n    val_data = tx.data.PairedTextData(hparams=config_data.val)\n    test_data = tx.data.PairedTextData(hparams=config_data.test)\n    iterator = tx.data.FeedableDataIterator(\n        {'train': train_data, 'val': val_data, 'test': test_data})\n\n    batch = iterator.get_next()\n\n    outputs, sequence_length, infer_outputs = build_model(batch, train_data)\n\n    agent = tx.agents.SeqPGAgent(\n        samples=outputs.sample_id,\n        logits=outputs.logits,\n        sequence_length=sequence_length,\n        hparams=config_model.agent)\n\n    def _train_and_eval(sess, agent):\n        iterator.restart_dataset(sess, 'train')\n\n        best_val_bleu = -1.\n        step = 0\n        while True:\n            try:\n                # Samples\n                extra_fetches = {\n                    'truth': batch['target_text_ids'],\n                }\n                feed_dict = {\n                    iterator.handle: iterator.get_handle(sess, 'train')\n                }\n                fetches = agent.get_samples(\n                    extra_fetches=extra_fetches, feed_dict=feed_dict)\n\n                sample_text = tx.utils.map_ids_to_strs(\n                    fetches['samples'], train_data.target_vocab,\n                    strip_eos=False, join=False)\n                truth_text = tx.utils.map_ids_to_strs(\n                    fetches['truth'], train_data.target_vocab,\n                    strip_eos=False, join=False)\n\n                # Computes rewards\n                reward = []\n                for ref, hyp in zip(truth_text, sample_text):\n                    r = tx.evals.sentence_bleu([ref], hyp, smooth=True)\n                    reward.append(r)\n\n                # Updates\n                loss = agent.observe(reward=reward)\n\n                # Displays & evaluates\n                step += 1\n                if step == 1 or step % config_data.display == 0:\n                    print(\"step={}, loss={:.4f}, reward={:.4f}\".format(\n                        step, loss, np.mean(reward)))\n\n                if step % config_data.display_eval == 0:\n                    val_bleu = _eval_epoch(sess, 'val')\n                    best_val_bleu = max(best_val_bleu, val_bleu)\n                    print('val step={}, BLEU={:.4f}; best-ever={:.4f}'.format(\n                        step, val_bleu, best_val_bleu))\n\n                    test_bleu = _eval_epoch(sess, 'test')\n                    print('test step={}, BLEU={:.4f}'.format(step, test_bleu))\n                    print('=' * 50)\n\n            except tf.errors.OutOfRangeError:\n                break\n\n    def _eval_epoch(sess, mode):\n        \"\"\"`mode` is one of {'val', 'test'}\n        \"\"\"\n        iterator.restart_dataset(sess, mode)\n\n        refs, hypos = [], []\n        while True:\n            try:\n                fetches = [\n                    batch['target_text'][:, 1:],\n                    infer_outputs.predicted_ids[:, :, 0]\n                ]\n                feed_dict = {\n                    tx.global_mode(): tf.estimator.ModeKeys.PREDICT,\n                    iterator.handle: iterator.get_handle(sess, mode)\n                }\n                target_texts, output_ids = \\\n                    sess.run(fetches, feed_dict=feed_dict)\n\n                target_texts = tx.utils.strip_special_tokens(target_texts)\n                output_texts = tx.utils.map_ids_to_strs(\n                    ids=output_ids, vocab=val_data.target_vocab)\n\n                for hypo, ref in zip(output_texts, target_texts):\n                    hypos.append(hypo)\n                    refs.append([ref])\n            except tf.errors.OutOfRangeError:\n                break\n\n        return tx.evals.corpus_bleu_moses(list_of_references=refs,\n                                          hypotheses=hypos)\n\n    with tf.Session() as sess:\n        sess.run(tf.global_variables_initializer())\n        sess.run(tf.local_variables_initializer())\n        sess.run(tf.tables_initializer())\n\n        agent.sess = sess\n\n        _train_and_eval(sess, agent)\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "texar_repo/examples/seqgan/README.md",
    "content": "# SeqGAN for Text Generation\n\nThis example is an implementation of [(Yu et al.) SeqGAN: Sequence Generative Adversarial Nets with Policy Gradient](https://arxiv.org/pdf/1609.05473.pdf), with a language model as the generator and an RNN-based classifier as the discriminator.\n\nModel architecture and parameter settings are in line with the [official implementation](https://github.com/geek-ai/Texygen) of SeqGAN, except that we replace the MC-Tree rollout strategy with token-level reward by the RNN discriminator, which is simpler and provides competitive performance.\n\nExperiments are performed on two datasets:\n* The [PTB dataset](https://corochann.com/penn-tree-bank-ptb-dataset-introduction-1456.html) standard for language modeling\n* The [COCO Captions dataset](http://cocodataset.org/#download): with 2K vocabularies and an average sentence length of 25. We use the [data](https://github.com/geek-ai/Texygen/tree/master/data) provided in the official implementation, where train/test datasets contain 10K sentences, respectively.\n\n## Usage\n\n### Dataset\nDownload datasets with the following cmds respectively:\n```shell\npython data_utils.py --config config_ptb_small --data_path ./ --dataset ptb\npython data_utils.py --config config_coco --data_path ./ --dataset coco\n```\n\nHere:\n* `--config` specifies config parameters to use. Default is `config_ptb_small`.\n* `--data_path` is the directory to store the downloaded dataset. Default is `./`.\n* `--dataset` indicates the training dataset. Currently `ptb`(default) and `coco` are supported.\n\n### Train the model\n\nTraining on `coco` dataset can be performed with the following command:\n\n```shell\npython seqgan_train.py --config config_coco --data_path ./ --dataset coco\n```\n\nHere:\n\n`--config`, `--data_path` and `--dataset` should be the same with the flags settings used to download the dataset.\n\nThe model will start training and will evaluate perplexity and BLEU score every 10 epochs.\n\n## Results\n\n### COCO Caption\n\nWe compare the results of SeqGAN and MLE (maximum likelihood training) provided by our and official implemantations, using the default official parameter settings. Each cell below presents the BLEU scores on both the test set and the training set (in the parentheses). \n\nWe use the standard BLEU function [`texar.evals.sentence_bleu_moses`](https://texar.readthedocs.io/en/latest/code/evals.html#sentence-bleu-moses) to evaluate BLEU scores for both the official and our implementations.\n\n|    |Texar - SeqGAN   | Official - SeqGAN | Texar - MLE | Official - MLE |\n|---------------|-------------|----------------|-------------|----------------|\n|BLEU-1 | 0.5670 (0.6850) | 0.6260 (0.7900) | 0.7130 (0.9360) | 0.6620 (0.8770) |\n|BLEU-2 | 0.3490 (0.5330) | 0.3570 (0.5880) | 0.4510 (0.7590) | 0.3780 (0.6910) |\n|BLEU-3 | 0.1940 (0.3480) | 0.1660 (0.3590) | 0.2490 (0.4990) | 0.1790 (0.4470) |\n|BLEU-4 | 0.0940 (0.1890) | 0.0710 (0.1800) | 0.1170 (0.2680) | 0.0790 (0.2390)|\n\n### PTB\n\nOn PTB data, we use three different hyperparameter configurations which result in models of different sizes.\nThe perplexity on both the test set and the training set are listed in the following table.\n\n|config|train   |Official - train |test    |  Official - test |\n|---   |---     |---              |---     |---               |\n|small |28.4790 |53.2289          |58.9798 | 55.7736          |\n|medium|16.3243 |9.8919           |37.6558 | 20.8537          |\n|large |14.5739 |4.7015           |52.0850 | 39.7949          |\n\n## Training Log\n\nDuring training, loss and BLEU score are recorded in the log directory. Here, we provide sample log output when training on the  `coco` dataset.\n\n### Training loss\nTraining loss will be recorded in coco_log/log.txt.\n```text\nG pretrain epoch   0, step   1: train_ppl: 1781.854030\nG pretrain epoch   1, step 201: train_ppl: 10.483647\nG pretrain epoch   2, step 401: train_ppl: 7.335757\n...\nG pretrain epoch  77, step 12201: train_ppl: 3.372638\nG pretrain epoch  78, step 12401: train_ppl: 3.534658\nD pretrain epoch   0, step   0: dis_total_loss: 27.025223, r_loss: 13.822192, f_loss: 13.203032\nD pretrain epoch   1, step   0: dis_total_loss: 26.331108, r_loss: 13.592842, f_loss: 12.738266\nD pretrain epoch   2, step   0: dis_total_loss: 27.042515, r_loss: 13.592712, f_loss: 13.449802\n...\nD pretrain epoch  77, step   0: dis_total_loss: 25.134272, r_loss: 12.660420, f_loss: 12.473851\nD pretrain epoch  78, step   0: dis_total_loss: 23.727032, r_loss: 12.822734, f_loss: 10.904298\nD pretrain epoch  79, step   0: dis_total_loss: 24.769077, r_loss: 12.733292, f_loss: 12.035786\nG train  epoch  80, step 12601: mean_reward: 0.027631, expect_reward_loss:-0.256241, update_loss: -20.670971\nD train  epoch  80, step   0: dis_total_loss: 25.222481, r_loss: 12.671371, f_loss: 12.551109\nD train  epoch  81, step   0: dis_total_loss: 25.695383, r_loss: 13.037079, f_loss: 12.658304\n...\nG train  epoch 178, step 22401: mean_reward: 3.409714, expect_reward_loss:-3.474687, update_loss: 733.247009\nD train  epoch 178, step   0: dis_total_loss: 24.715553, r_loss: 13.181369, f_loss: 11.534184\nD train  epoch 179, step   0: dis_total_loss: 24.572170, r_loss: 13.176209, f_loss: 11.395961\n```\n\n### BLEU\nBLEU1~BLEU4 scores will be calculated every 10 epochs, the results are written to log_dir/bleu.txt.\n```text\n...\nepoch 170 BLEU1~4 on train dataset:\n0.726647\n0.530675\n0.299362\n0.133602\n\n epoch 170 BLEU1~4 on test dataset:\n0.548151\n0.283765\n0.118528\n0.042177\n...\n```\n\n"
  },
  {
    "path": "texar_repo/examples/seqgan/config_coco.py",
    "content": "generator_pretrain_epoch = 80\ndiscriminator_pretrain_epoch = 80\nadversial_epoch = 100\n\nhidden_size = 32\nbatch_size = 64\nmax_num_steps = 20\n\nenc_keep_prob_in = 1.0\ndec_keep_prob_out = 1.0\n\nlog_dir = './coco_log/'\nlog_file = log_dir + 'log.txt'\nbleu_file = log_dir + 'bleu.txt'\nckpt = './checkpoint/ckpt'\n\ndec_cell_hparams = {\n    \"type\": \"LSTMBlockCell\",\n    \"kwargs\": {\n        \"num_units\": hidden_size,\n        \"forget_bias\": 0.\n    },\n    \"dropout\": {\"output_keep_prob\": dec_keep_prob_out},\n    \"num_layers\": 1\n}\n\nemb_hparams = {\n    'name': 'lookup_table',\n    \"dim\": hidden_size,\n    'initializer': {\n        'type': 'random_normal_initializer',\n        'kwargs': {\n            'mean': 0.0,\n            'stddev': hidden_size**-0.5,\n        },\n    }\n}\n\ntrain_data_hparams = {\n    \"num_epochs\": 1,\n    \"batch_size\": batch_size,\n    \"seed\": 123,\n    \"dataset\": {\n        \"files\": 'coco_data/coco.train.txt',\n        \"vocab_file\": 'coco_data/vocab.txt',\n        \"max_seq_length\": max_num_steps\n    }\n}\n\nval_data_hparams = {\n    \"num_epochs\": 1,\n    \"batch_size\": batch_size,\n    \"seed\": 123,\n    \"dataset\": {\n        \"files\": 'coco_data/coco.valid.txt',\n        \"vocab_file\": 'coco_data/vocab.txt',\n        \"max_seq_length\": max_num_steps\n    }\n}\n\ntest_data_hparams = {\n    \"num_epochs\": 1,\n    \"batch_size\": batch_size,\n    \"dataset\": {\n        \"files\": 'coco_data/coco.test.txt',\n        \"vocab_file\": 'coco_data/vocab.txt',\n        \"max_seq_length\": max_num_steps\n    }\n}\n\ng_opt_hparams = {\n    \"optimizer\": {\n        \"type\": \"AdamOptimizer\",\n        \"kwargs\": {\n            \"learning_rate\": 0.01\n        }\n    },\n    \"gradient_clip\": {\n        \"type\": \"clip_by_global_norm\",\n        \"kwargs\": {\"clip_norm\": 5.}\n    }\n}\n\nd_opt_hparams = {\n    \"optimizer\": {\n        \"type\": \"AdamOptimizer\",\n        \"kwargs\": {\n            \"learning_rate\": 0.0001\n        }\n    }\n}\n\nupdate_opt_hparams = {\n    \"optimizer\": {\n        \"type\": \"AdamOptimizer\",\n        \"kwargs\": {\n            \"learning_rate\": 0.0004\n        }\n    }\n}\n"
  },
  {
    "path": "texar_repo/examples/seqgan/config_ptb_large.py",
    "content": "generator_pretrain_epoch = 55\ndiscriminator_pretrain_epoch = 15\nadversial_epoch = 20\n\nhidden_size = 1500\nbatch_size = 64\nmax_num_steps = 35\n\nenc_keep_prob_in = 1.0\ndec_keep_prob_out = 0.35\n\nlog_dir = './ptb_log.large/'\nlog_file = log_dir + 'log.txt'\nbleu_file = log_dir + 'bleu.txt'\nckpt = './checkpoint/ckpt'\n\ndec_cell_hparams = {\n    \"type\": \"LSTMBlockCell\",\n    \"kwargs\": {\n        \"num_units\": hidden_size,\n        \"forget_bias\": 0.\n    },\n    \"dropout\": {\"output_keep_prob\": dec_keep_prob_out},\n    \"num_layers\": 2\n}\n\nemb_hparams = {\n    'name': 'lookup_table',\n    \"dim\": hidden_size,\n    'initializer': {\n        'type': 'random_normal_initializer',\n        'kwargs': {\n            'mean': 0.0,\n            'stddev': hidden_size**-0.5,\n        },\n    }\n}\n\ntrain_data_hparams = {\n    \"num_epochs\": 1,\n    \"batch_size\": batch_size,\n    \"seed\": 123,\n    \"dataset\": {\n        \"files\": 'ptb_data/ptb.train.txt',\n        \"vocab_file\": 'ptb_data/vocab.txt',\n        \"max_seq_length\": max_num_steps\n    }\n}\n\nval_data_hparams = {\n    \"num_epochs\": 1,\n    \"batch_size\": batch_size,\n    \"seed\": 123,\n    \"dataset\": {\n        \"files\": 'ptb_data/ptb.valid.txt',\n        \"vocab_file\": 'ptb_data/vocab.txt',\n        \"max_seq_length\": max_num_steps\n    }\n}\n\ntest_data_hparams = {\n    \"num_epochs\": 1,\n    \"batch_size\": batch_size,\n    \"dataset\": {\n        \"files\": 'ptb_data/ptb.test.txt',\n        \"vocab_file\": 'ptb_data/vocab.txt',\n        \"max_seq_length\": max_num_steps\n    }\n}\n\ng_opt_hparams = {\n    \"optimizer\": {\n        \"type\": \"GradientDescentOptimizer\",\n        \"kwargs\": {\"learning_rate\": 1.0}\n    },\n    \"gradient_clip\": {\n        \"type\": \"clip_by_global_norm\",\n        \"kwargs\": {\"clip_norm\": 10.}\n    }\n}\n\nd_opt_hparams = {\n    \"optimizer\": {\n        \"type\": \"AdamOptimizer\",\n        \"kwargs\": {\n            \"learning_rate\": 0.0001\n        }\n    }\n}\n\nupdate_opt_hparams = {\n    \"optimizer\": {\n        \"type\": \"AdamOptimizer\",\n        \"kwargs\": {\n            \"learning_rate\": 0.0004\n        }\n    }\n}\n"
  },
  {
    "path": "texar_repo/examples/seqgan/config_ptb_medium.py",
    "content": "generator_pretrain_epoch = 39\ndiscriminator_pretrain_epoch = 15\nadversial_epoch = 20\n\nhidden_size = 650\nbatch_size = 64\nmax_num_steps = 35\n\nenc_keep_prob_in = 1.0\ndec_keep_prob_out = 0.5\n\nlog_dir = './ptb_log.medium/'\nlog_file = log_dir + 'log.txt'\nbleu_file = log_dir + 'bleu.txt'\nckpt = './checkpoint/ckpt'\n\ndec_cell_hparams = {\n    \"type\": \"LSTMBlockCell\",\n    \"kwargs\": {\n        \"num_units\": hidden_size,\n        \"forget_bias\": 0.\n    },\n    \"dropout\": {\"output_keep_prob\": dec_keep_prob_out},\n    \"num_layers\": 2\n}\n\nemb_hparams = {\n    'name': 'lookup_table',\n    \"dim\": hidden_size,\n    'initializer': {\n        'type': 'random_normal_initializer',\n        'kwargs': {\n            'mean': 0.0,\n            'stddev': hidden_size**-0.5,\n        },\n    }\n}\n\ntrain_data_hparams = {\n    \"num_epochs\": 1,\n    \"batch_size\": batch_size,\n    \"seed\": 123,\n    \"dataset\": {\n        \"files\": 'ptb_data/ptb.train.txt',\n        \"vocab_file\": 'ptb_data/vocab.txt',\n        \"max_seq_length\": max_num_steps\n    }\n}\n\nval_data_hparams = {\n    \"num_epochs\": 1,\n    \"batch_size\": batch_size,\n    \"seed\": 123,\n    \"dataset\": {\n        \"files\": 'ptb_data/ptb.valid.txt',\n        \"vocab_file\": 'ptb_data/vocab.txt',\n        \"max_seq_length\": max_num_steps\n    }\n}\n\ntest_data_hparams = {\n    \"num_epochs\": 1,\n    \"batch_size\": batch_size,\n    \"dataset\": {\n        \"files\": 'ptb_data/ptb.test.txt',\n        \"vocab_file\": 'ptb_data/vocab.txt',\n        \"max_seq_length\": max_num_steps\n    }\n}\n\ng_opt_hparams = {\n    \"optimizer\": {\n        \"type\": \"GradientDescentOptimizer\",\n        \"kwargs\": {\"learning_rate\": 1.0}\n    },\n    \"gradient_clip\": {\n        \"type\": \"clip_by_global_norm\",\n        \"kwargs\": {\"clip_norm\": 5.}\n    }\n}\n\nd_opt_hparams = {\n    \"optimizer\": {\n        \"type\": \"AdamOptimizer\",\n        \"kwargs\": {\n            \"learning_rate\": 0.0001\n        }\n    }\n}\n\nupdate_opt_hparams = {\n    \"optimizer\": {\n        \"type\": \"AdamOptimizer\",\n        \"kwargs\": {\n            \"learning_rate\": 0.0004\n        }\n    }\n}\n"
  },
  {
    "path": "texar_repo/examples/seqgan/config_ptb_small.py",
    "content": "generator_pretrain_epoch = 13\ndiscriminator_pretrain_epoch = 15\nadversial_epoch = 10\n\nhidden_size = 200\nbatch_size = 64\nmax_num_steps = 20\n\nenc_keep_prob_in = 1.0\ndec_keep_prob_out = 1.0\n\nlog_dir = './ptb_log.small/'\nlog_file = log_dir + 'log.txt'\nbleu_file = log_dir + 'bleu.txt'\nckpt = './checkpoint/ckpt'\n\ndec_cell_hparams = {\n    \"type\": \"LSTMBlockCell\",\n    \"kwargs\": {\n        \"num_units\": hidden_size,\n        \"forget_bias\": 0.\n    },\n    \"dropout\": {\"output_keep_prob\": dec_keep_prob_out},\n    \"num_layers\": 2\n}\n\nemb_hparams = {\n    'name': 'lookup_table',\n    \"dim\": hidden_size,\n    'initializer': {\n        'type': 'random_normal_initializer',\n        'kwargs': {\n            'mean': 0.0,\n            'stddev': hidden_size**-0.5,\n        },\n    }\n}\n\ntrain_data_hparams = {\n    \"num_epochs\": 1,\n    \"batch_size\": batch_size,\n    \"seed\": 123,\n    \"dataset\": {\n        \"files\": 'ptb_data/ptb.train.txt',\n        \"vocab_file\": 'ptb_data/vocab.txt',\n        \"max_seq_length\": max_num_steps\n    }\n}\n\nval_data_hparams = {\n    \"num_epochs\": 1,\n    \"batch_size\": batch_size,\n    \"seed\": 123,\n    \"dataset\": {\n        \"files\": 'ptb_data/ptb.valid.txt',\n        \"vocab_file\": 'ptb_data/vocab.txt',\n        \"max_seq_length\": max_num_steps\n    }\n}\n\ntest_data_hparams = {\n    \"num_epochs\": 1,\n    \"batch_size\": batch_size,\n    \"dataset\": {\n        \"files\": 'ptb_data/ptb.test.txt',\n        \"vocab_file\": 'ptb_data/vocab.txt',\n        \"max_seq_length\": max_num_steps\n    }\n}\n\ng_opt_hparams = {\n    \"optimizer\": {\n        \"type\": \"GradientDescentOptimizer\",\n        \"kwargs\": {\"learning_rate\": 1.0}\n    },\n    \"gradient_clip\": {\n        \"type\": \"clip_by_global_norm\",\n        \"kwargs\": {\"clip_norm\": 5.}\n    }\n}\n\nd_opt_hparams = {\n    \"optimizer\": {\n        \"type\": \"AdamOptimizer\",\n        \"kwargs\": {\n            \"learning_rate\": 0.0001\n        }\n    }\n}\n\nupdate_opt_hparams = {\n    \"optimizer\": {\n        \"type\": \"AdamOptimizer\",\n        \"kwargs\": {\n            \"learning_rate\": 0.0004\n        }\n    }\n}\n"
  },
  {
    "path": "texar_repo/examples/seqgan/data_utils.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"SeqGAN for language modeling\n\"\"\"\nimport os\nimport argparse\nimport importlib\nimport tensorflow as tf\nimport texar as tx\n\nparser = argparse.ArgumentParser(description='prepare data')\nparser.add_argument('--dataset', type=str, default='ptb',\n                    help='dataset to prepare')\nparser.add_argument('--data_path', type=str, default='./',\n                    help=\"Directory containing coco. If not exists, \"\n                    \"the directory will be created, and the data \"\n                    \"will be downloaded.\")\nparser.add_argument('--config', type=str, default='config_ptb_small',\n                    help='The config to use.')\nargs = parser.parse_args()\n\nconfig = importlib.import_module(args.config)\n\n\ndef prepare_data(args, config, train_path):\n    \"\"\"Downloads the PTB or COCO dataset\n    \"\"\"\n    if not os.path.exists(config.log_dir):\n        os.mkdir(config.log_dir)\n\n    ptb_url = 'https://jxhe.github.io/download/ptb_data.tgz'\n    coco_url = 'https://VegB.github.io/downloads/coco_data.tgz'\n\n    data_path = args.data_path\n\n    if not tf.gfile.Exists(train_path):\n        url = ptb_url if args.dataset == 'ptb' else coco_url\n        tx.data.maybe_download(url, data_path, extract=True)\n        os.remove('%s_data.tgz' % args.dataset)\n\n\nif __name__ == '__main__':\n    prepare_data(args, config, config.train_data_hparams['dataset']['files'])\n"
  },
  {
    "path": "texar_repo/examples/seqgan/seqgan_train.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"SeqGAN for language modeling\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=invalid-name, no-member, too-many-locals\n\nimport importlib\nimport numpy as np\nimport tensorflow as tf\nimport texar as tx\n\nflags = tf.flags\nflags.DEFINE_string(\"dataset\", \"ptb\",\n                    \"perform training on ptb or coco.\")\nflags.DEFINE_string(\"data_path\", \"./\",\n                    \"Directory containing coco. If not exists, \"\n                    \"the directory will be created, and the data \"\n                    \"will be downloaded.\")\nflags.DEFINE_string(\"config\", \"config_ptb_small\", \"The config to use.\")\nFLAGS = flags.FLAGS\n\nconfig = importlib.import_module(FLAGS.config)\n\n\ndef _main(_):\n    log = open(config.log_file, 'w')\n    bleu_log = open(config.bleu_file, 'w')\n\n    # Data\n    train_data = tx.data.MonoTextData(config.train_data_hparams)\n    val_data = tx.data.MonoTextData(config.val_data_hparams)\n    test_data = tx.data.MonoTextData(config.test_data_hparams)\n    iterator = tx.data.TrainTestDataIterator(train=train_data,\n                                             val=val_data,\n                                             test=test_data)\n    data_batch = iterator.get_next()\n\n    batch_size = tf.shape(data_batch[\"text_ids\"])[0]\n    num_steps = tf.shape(data_batch[\"text_ids\"])[1]\n    vocab_size = train_data.vocab.size\n\n    # Model architecture\n    g_embedder = tx.modules.WordEmbedder(vocab_size=vocab_size,\n                                         hparams=config.emb_hparams)\n    input_embed = g_embedder(data_batch[\"text_ids\"][:, :-1])\n\n    if config.enc_keep_prob_in < 1:\n        input_embed = tf.nn.dropout(\n            input_embed, tx.utils.switch_dropout(config.enc_keep_prob_in))\n\n    decoder = tx.modules.BasicRNNDecoder(\n        vocab_size=vocab_size,\n        hparams={\"rnn_cell\": config.dec_cell_hparams,\n                 \"max_decoding_length_infer\": config.max_num_steps + 2})\n    initial_state = decoder.zero_state(batch_size=batch_size,\n                                       dtype=tf.float32)\n    g_variables = tx.utils.collect_trainable_variables([g_embedder, decoder])\n\n    # ------------Pretrain Generator---------------\n    outputs, _, _ = decoder(\n        initial_state=initial_state,\n        decoding_strategy=\"train_greedy\",\n        inputs=input_embed,\n        sequence_length=data_batch[\"length\"] - 1)\n\n    mle_loss = tx.losses.sequence_sparse_softmax_cross_entropy(\n        labels=data_batch[\"text_ids\"][:, 1:],\n        logits=outputs.logits,\n        sequence_length=data_batch[\"length\"] - 1)\n\n    global_step = tf.Variable(0, trainable=False)\n    gen_train_op = tx.core.get_train_op(mle_loss,\n                                        variables=g_variables,\n                                        global_step=global_step,\n                                        increment_global_step=True,\n                                        hparams=config.g_opt_hparams)\n\n    # -------------Generator Infer-------------------\n    start_tokens = tf.cast(tf.fill([batch_size],\n                                   train_data.vocab.bos_token_id),\n                           dtype=tf.int32)\n    infer_outputs, _, sequence_length = decoder(\n        decoding_strategy=\"infer_sample\",\n        start_tokens=start_tokens,\n        end_token=train_data.vocab.eos_token_id,\n        embedding=g_embedder,\n        initial_state=initial_state,\n        max_decoding_length=config.max_num_steps)\n\n    infer_logits = infer_outputs.logits\n    infer_sample_ids = infer_outputs.sample_id\n\n    # ------------Pretrain Discriminator---------------\n    discriminator = tx.modules.UnidirectionalRNNClassifier(\n        hparams={\"clas_strategy\": \"time_wise\", \"num_classes\": 1})\n    d_embedder = tx.modules.WordEmbedder(vocab_size=vocab_size,\n                                         hparams=config.emb_hparams)\n    d_variables = tx.utils.collect_trainable_variables([discriminator, d_embedder])\n\n    r_logits, _ = discriminator(d_embedder(data_batch[\"text_ids\"][:, 1:]),\n                                sequence_length=data_batch[\"length\"] - 1)\n    f_logits, _ = discriminator(d_embedder(infer_sample_ids), sequence_length=sequence_length)\n\n    r_loss = tx.losses.sequence_sigmoid_cross_entropy(\n        labels=tf.ones_like(data_batch[\"text_ids\"][:, 1:], dtype=tf.float32),\n        logits=tf.squeeze(r_logits),\n        sequence_length=data_batch[\"length\"] - 1)  # r_preds -> 1.\n    f_loss = tx.losses.sequence_sigmoid_cross_entropy(\n        labels=tf.zeros_like(infer_sample_ids, dtype=tf.float32),\n        logits=tf.squeeze(f_logits),\n        sequence_length=sequence_length)  # infer_logits -> 0.\n    dis_loss = r_loss + f_loss\n    dis_loss.set_shape(())\n\n    dis_train_op = tx.core.get_train_op(dis_loss,\n                                        variables=d_variables,\n                                        global_step=global_step,\n                                        increment_global_step=False,\n                                        hparams=config.d_opt_hparams)\n\n    # ------------Adeversarial---------------\n    infer_logits = tf.clip_by_value(\n        tf.nn.softmax(infer_logits) *\n        tf.one_hot(infer_sample_ids, vocab_size), 1e-20, 1)\n\n    expected_reward = tf.Variable(tf.zeros((config.max_num_steps,)))\n    reward = tf.reshape(f_logits, shape=(batch_size, -1)) - \\\n            expected_reward[:tf.shape(f_logits)[1]]\n    mean_reward = tf.reduce_mean(reward)\n    exp_reward_loss = -tf.reduce_mean(tf.abs(reward))\n    exp_reward_loss.set_shape(())\n    exp_op = tx.core.get_train_op(exp_reward_loss,\n                                  variables=[expected_reward],\n                                  global_step=global_step,\n                                  increment_global_step=False,\n                                  hparams=config.update_opt_hparams)\n    reward = tx.losses.discount_reward(\n        reward, sequence_length=tf.squeeze(sequence_length), tensor_rank=2)\n    update_loss = -tf.reduce_mean(tf.log(infer_logits) *\n                                  tf.expand_dims(reward, -1))\n    update_loss.set_shape(())\n    gen_op = tx.core.get_train_op(update_loss,\n                                  variables=g_variables,\n                                  global_step=global_step,\n                                  increment_global_step=True,\n                                  hparams=config.update_opt_hparams)\n    update_op = tf.group(gen_op, exp_op)\n\n    def _g_train_epoch(sess, epoch, mode_string):\n        iterator.switch_to_train_data(sess)\n        while True:\n            try:\n                if mode_string == 'train':\n                    fetches = {\n                        'mean_rwd': mean_reward,\n                        'exp_rwd_loss': exp_reward_loss,\n                        'update_loss': update_loss,\n                        'update_op': update_op,\n                        'exp_rwd': expected_reward,\n                        'step': global_step\n                    }\n                elif mode_string == 'pretrain':\n                    fetches = {\n                        'mle_loss': mle_loss,\n                        'num_steps': num_steps,\n                        'train_op': gen_train_op,\n                        'step': global_step\n                    }\n                else:\n                    raise ValueError(\n                        \"Expect mode_string to be one of \"\n                        \"['pretrain', 'train'], got %s\" % mode_string)\n                rtns = sess.run(fetches)\n                step = rtns['step']\n                if step % 200 == 1:\n                    if mode_string == 'pretrain':\n                        ppl = np.exp(rtns['mle_loss'] / rtns[\"num_steps\"])\n                        rst = \"G {0:6s} epoch {1:3d}, step {2:3d}:\" \\\n                              \" train_ppl: {3:6f}\".format(mode_string,\n                                                          epoch, step, ppl)\n                    else:\n                        rst = \"G {0:6s} epoch {1:3d}, step {2:3d}: \" \\\n                              \"mean_reward: {3:6f}, \" \\\n                              \"expect_reward_loss:{4:6f}, \" \\\n                              \"update_loss: {5:6f}\".format(\n                                  mode_string, epoch, step, rtns['mean_rwd'],\n                                  rtns['exp_rwd_loss'], rtns['update_loss'])\n                    log.write(rst + '\\n')\n                    log.flush()\n                    print(rst)\n                    if mode_string == 'train':  # a batch per adversarial epoch\n                        break\n            except tf.errors.OutOfRangeError:\n                break\n        return\n\n    def _g_test_epoch(sess, epoch, mode_string):\n        def _id2word_map(id_arrays):\n            return [' '.join([train_data.vocab.id_to_token_map_py[i]\n                              for i in sent]) for sent in id_arrays]\n\n        if mode_string == 'valid':\n            iterator.switch_to_val_data(sess)\n        elif mode_string == 'test':\n            iterator.switch_to_test_data(sess)\n        else:\n            raise ValueError(\"Expect mode_string to be one of \"\n                             \"['valid', 'test'], got %s\" % mode_string)\n\n        target_list, inference_list = [], []\n        loss, steps = 0., 0\n        while True:\n            try:\n                fetches = {\n                    \"mle_loss\": mle_loss,\n                    \"num_steps\": num_steps\n                }\n                if mode_string == 'test':\n                    fetches['target_sample_id'] = data_batch[\"text_ids\"]\n                    fetches['infer_sample_id'] = infer_sample_ids\n\n                feed_dict = {tx.global_mode(): tf.estimator.ModeKeys.EVAL}\n\n                rtns = sess.run(fetches, feed_dict)\n\n                loss += rtns['mle_loss']\n                steps += rtns['num_steps']\n\n                if mode_string == 'test':\n                    targets = _id2word_map(rtns['target_sample_id'][:, 1:].tolist())  # remove <BOS>\n                    for t in targets:\n                        target_list.extend(t.split('<EOS>')[0].strip().split())\n\n                    inferences = _id2word_map(rtns['infer_sample_id'].tolist())\n                    for inf in inferences:\n                        inference_list.extend(inf.split('<EOS>')[0].strip().split())\n\n            except tf.errors.OutOfRangeError:\n                break\n\n        ppl = np.exp(loss / steps)\n        rst = \"G {0:6s} epoch {1:3d}, step {2:3s}:\" \\\n              \" {3:5s}_ppl: {4:6f}\"\\\n            .format(mode_string, epoch, '-', mode_string, ppl)\n        log.write(rst + '\\n')\n        log.flush()\n        print(rst)\n\n        if mode_string == 'test':\n            bleu_test = tx.evals.sentence_bleu_moses(\n                references=[target_list],\n                hypothesis=inference_list,\n                lowercase=True, return_all=True)\n            if not isinstance(bleu_test, np.ndarray):  # might return 0.0 if inference_list is null\n                bleu_test = [bleu_test] * 5\n            rst_test = \"epoch %d BLEU1~4 on test dataset:\\n\" \\\n                       \"%f\\n%f\\n%f\\n%f\\n\\n\" % \\\n                       (epoch, bleu_test[1], bleu_test[2],\n                        bleu_test[3], bleu_test[4])\n            print(rst_test)\n            bleu_log.write(rst_test)\n            bleu_log.flush()\n\n        return\n\n    def _d_run_epoch(sess, epoch, mode_string='pretrain'):\n        iterator.switch_to_train_data(sess)\n        step = 0\n        while True:\n            try:\n                fetches = {\n                    \"mle_loss\": dis_loss,\n                    \"r_loss\": r_loss,\n                    \"f_loss\": f_loss,\n                    \"train_op\": dis_train_op\n                }\n                rtns = sess.run(fetches)\n                if step % 200 == 0:\n                    rst = \"D {0:6s} epoch {1:3d}, step {2:3d}: \" \\\n                          \"dis_total_loss: {3:6f}, r_loss: {4:6f}, \" \\\n                          \"f_loss: {5:6f}\".format(\n                              mode_string, epoch, step, rtns['mle_loss'],\n                              rtns['r_loss'], rtns['f_loss'])\n                    log.write(rst + '\\n')\n                    log.flush()\n                    print(rst)\n                step += 1\n                if step == 15 and mode_string == 'train':\n                    break\n            except tf.errors.OutOfRangeError:\n                break\n\n    tf_config = tf.ConfigProto()\n    tf_config.gpu_options.allow_growth = True\n    with tf.Session(config=tf_config) as sess:\n        sess.run(tf.global_variables_initializer())\n        sess.run(tf.local_variables_initializer())\n        sess.run(tf.tables_initializer())\n\n        # Generator pre-training\n        for g_epoch in range(config.generator_pretrain_epoch):\n            _g_train_epoch(sess, g_epoch, 'pretrain')\n            if g_epoch % 10 == 0 or \\\n                    g_epoch == config.generator_pretrain_epoch - 1:\n                _g_test_epoch(sess, g_epoch, 'valid')\n                _g_test_epoch(sess, g_epoch, 'test')\n\n        # Discriminator pre-training\n        for d_epoch in range(config.discriminator_pretrain_epoch):\n            _d_run_epoch(sess, d_epoch)\n\n        # Adversarial training\n        for update_epoch in range(config.adversial_epoch):\n            cur_epoch = update_epoch + config.generator_pretrain_epoch\n            _g_train_epoch(sess, cur_epoch, 'train')\n            _d_run_epoch(sess, cur_epoch, mode_string='train')\n            if update_epoch % 10 == 0 or \\\n                    update_epoch == config.adversial_epoch - 1:\n                _g_test_epoch(sess, cur_epoch, 'valid')\n                _g_test_epoch(sess, cur_epoch, 'test')\n\n    log.close()\n    bleu_log.close()\n\nif __name__ == '__main__':\n    tf.app.run(main=_main)\n\n\n"
  },
  {
    "path": "texar_repo/examples/sequence_tagging/README.md",
    "content": "# Sequence tagging on CoNLL-2003 #\n\nThis example builds a bi-directional LSTM-CNN model for NER task and trains on CoNLL-2003 data. Model and training are described in   \n>[(Ma et al.) End-to-end Sequence Labeling via Bi-directional LSTM-CNNs-CRF](http://www.cs.cmu.edu/~xuezhem/publications/P16-1101.pdf)\n\nThe top CRF layer is not used here.\n\n## Dataset ##\n\nThe code uses [CoNLL-2003 NER dataset](https://www.clips.uantwerpen.be/conll2003/ner/) (English). Please put data files (e.g., `eng.train.bio.conll`) under `./data` folder. Pretrained Glove word embeddings can also be used (set `load_glove=True` in [config.py](./config.py)). The Glove file should also be under `./data`. \n\n## Run ##\n\nTo train a NER model,\n\n    python ner.py\n\nThe model will begin training, and will evaluate on the validation data periodically, and evaluate on the test data after the training is done. \n\n## Results ##\n\nThe results on validation and test data is:\n\n|       |   prec   |  recall  |    F1    |\n|-------|----------|----------|----------|\n| valid |  91.18   |  92.41   |  91.79   |\n| test  |  86.13   |  88.31   |  87.21   |\n\n"
  },
  {
    "path": "texar_repo/examples/sequence_tagging/config.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"NER config.\n\"\"\"\n\n# pylint: disable=invalid-name, too-few-public-methods, missing-docstring\n\nnum_epochs = 200\nchar_dim = 30\nembed_dim = 100\nhidden_size = 256\ntag_space = 128\nkeep_prob = 0.5\nbatch_size = 16\nencoder = None\nload_glove = True\n\nemb = {\n    \"name\": \"embedding\",\n    \"dim\": embed_dim,\n    \"dropout_rate\": 0.33,\n    \"dropout_strategy\": 'item'\n}\n\nchar_emb = {\n    \"name\": \"char_embedding\",\n    \"dim\": char_dim\n}\n\nconv = {\n    \"filters\": 30,\n    \"kernel_size\": [3],\n    \"conv_activation\": \"tanh\",\n    \"num_dense_layers\": 0,\n    \"dropout_rate\": 0.\n}\n\ncell = {\n    \"type\": \"LSTMCell\",\n    \"kwargs\": {\n        \"num_units\": hidden_size,\n        \"forget_bias\": 1.\n    },\n    \"dropout\": {\"output_keep_prob\": keep_prob},\n    \"num_layers\": 1\n}\nopt = {\n    \"optimizer\": {\n        \"type\": \"MomentumOptimizer\",\n        \"kwargs\": {\"learning_rate\": 0.1,\n                   \"momentum\": 0.9,\n                   \"use_nesterov\": True}\n    },\n    \"learning_rate_decay\": {\n        \"type\": \"inverse_time_decay\",\n        \"kwargs\": {\n            \"decay_steps\": 1,\n            \"decay_rate\": 0.05,\n            \"staircase\": True\n        },\n        \"start_decay_step\": 1\n    }\n}\n"
  },
  {
    "path": "texar_repo/examples/sequence_tagging/conll_reader.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Utilities for preprocessing and iterating over the CoNLL 2003 data.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport re\nfrom collections import defaultdict\nimport numpy as np\nimport tensorflow as tf\n\n\n# pylint: disable=invalid-name, too-many-locals\n\nMAX_CHAR_LENGTH = 45\nNUM_CHAR_PAD = 2\n\nUNK_WORD, UNK_CHAR, UNK_NER = 0, 0, 0\nPAD_WORD, PAD_CHAR, PAD_NER = 1, 1, 1\n\n# Regular expressions used to normalize digits.\nDIGIT_RE = re.compile(r\"\\d\")\n\n\ndef create_vocabs(train_path, dev_path, test_path, normalize_digits=True, min_occur=1, glove_dict=None):\n    word_vocab = defaultdict(lambda: len(word_vocab))\n    word_count = defaultdict(lambda: 0)\n    char_vocab = defaultdict(lambda: len(char_vocab))\n    ner_vocab = defaultdict(lambda: len(ner_vocab))\n\n    UNK_WORD = word_vocab[\"<unk>\"]\n    PAD_WORD = word_vocab[\"<pad>\"]\n    UNK_CHAR = char_vocab[\"<unk>\"]\n    PAD_CHAR = char_vocab[\"<pad>\"]\n    UNK_NER = ner_vocab[\"<unk>\"]\n    PAD_NER = ner_vocab[\"<pad>\"]\n\n    print(\"Creating Vocabularies:\")\n\n    for file_path in [train_path, dev_path, test_path]:\n        with open(file_path, 'r') as file:\n            for line in file:\n                line = line.strip()\n                if len(line) == 0:\n                    continue\n\n                tokens = line.split(' ')\n                for char in tokens[1]:\n                    cid = char_vocab[char]\n\n                word = DIGIT_RE.sub(\"0\", tokens[1]) if normalize_digits else tokens[1]\n                ner = tokens[4]\n\n                if glove_dict is not None and (word in glove_dict or word.lower() in glove_dict):\n                    word_count[word] += min_occur + 1\n                elif file_path == train_path:\n                    word_count[word] += 1\n\n                nid = ner_vocab[ner]\n\n    print(\"Total Vocabulary Size: %d\" % len(word_count))\n    for word in word_count:\n        if word_count[word] > min_occur:\n            wid = word_vocab[word]\n\n    print(\"Word Vocabulary Size: %d\" % len(word_vocab))\n    print(\"Character Alphabet Size: %d\" % len(char_vocab))\n    print(\"NER Alphabet Size: %d\" % len(ner_vocab))\n\n    word_vocab = defaultdict(lambda: UNK_WORD, word_vocab)\n    char_vocab = defaultdict(lambda: UNK_CHAR, char_vocab)\n    ner_vocab = defaultdict(lambda: UNK_NER, ner_vocab)\n\n    i2w = {v: k for k, v in word_vocab.items()}\n    i2n = {v: k for k, v in ner_vocab.items()}\n    return (word_vocab, char_vocab, ner_vocab), (i2w, i2n)\n\n\ndef read_data(source_path, word_vocab, char_vocab, ner_vocab, normalize_digits=True):\n    data = []\n    print('Reading data from %s' % source_path)\n    counter = 0\n    reader = CoNLLReader(source_path, word_vocab, char_vocab, ner_vocab)\n    inst = reader.getNext(normalize_digits)\n    while inst is not None:\n        counter += 1\n        sent = inst.sentence\n        data.append([sent.word_ids, sent.char_id_seqs, inst.ner_ids])\n        inst = reader.getNext(normalize_digits)\n\n    reader.close()\n    print(\"Total number of data: %d\" % counter)\n    return data\n\n\ndef iterate_batch(data, batch_size, shuffle=False):\n    if shuffle:\n        np.random.shuffle(data)\n\n    for start_idx in range(0, len(data), batch_size):\n        excerpt = slice(start_idx, start_idx + batch_size)\n        batch = data[excerpt]\n\n        batch_length = max([len(batch[i][0]) for i in range(len(batch))])\n\n        wid_inputs = np.empty([len(batch), batch_length], dtype=np.int64)\n        cid_inputs = np.empty([len(batch), batch_length, MAX_CHAR_LENGTH], dtype=np.int64)\n        nid_inputs = np.empty([len(batch), batch_length], dtype=np.int64)\n        masks = np.zeros([len(batch), batch_length], dtype=np.float32)\n        lengths = np.empty(len(batch), dtype=np.int64)\n\n        for i, inst in enumerate(batch):\n            wids, cid_seqs, nids = inst\n\n            inst_size = len(wids)\n            lengths[i] = inst_size\n            # word ids\n            wid_inputs[i, :inst_size] = wids\n            wid_inputs[i, inst_size:] = PAD_WORD\n            for c, cids in enumerate(cid_seqs):\n                cid_inputs[i, c, :len(cids)] = cids\n                cid_inputs[i, c, len(cids):] = PAD_CHAR\n            cid_inputs[i, inst_size:, :] = PAD_CHAR\n            nid_inputs[i, :inst_size] = nids\n            nid_inputs[i, inst_size:] = PAD_NER\n            masks[i, :inst_size] = 1.0\n\n        yield wid_inputs, cid_inputs, nid_inputs, masks, lengths\n\n\ndef load_glove(filename, emb_dim, normalize_digits=True):\n    \"\"\"Loads embeddings in the glove text format in which each line is\n    '<word-string> <embedding-vector>'. Dimensions of the embedding vector\n    are separated with whitespace characters.\n\n    Args:\n        filename (str): Path to the embedding file.\n        vocab (dict): A dictionary that maps token strings to integer index.\n            Tokens not in :attr:`vocab` are not read.\n        word_vecs: A 2D numpy array of shape `[vocab_size, embed_dim]`\n            which is updated as reading from the file.\n\n    Returns:\n        The updated :attr:`word_vecs`.\n    \"\"\"\n    glove_dict = dict()\n    with tf.gfile.Open(filename) as fin:\n        for line in fin:\n            vec = line.strip().split()\n            if len(vec) == 0:\n                continue\n            word, vec = vec[0], vec[1:]\n            word = tf.compat.as_text(word)\n            word = DIGIT_RE.sub(\"0\", word) if normalize_digits else word\n            glove_dict[word] = np.array([float(v) for v in vec])\n            if len(vec) != emb_dim:\n                raise ValueError(\"Inconsistent word vector sizes: %d vs %d\" %\n                                 (len(vec), emb_dim))\n    return glove_dict\n\n\ndef construct_init_word_vecs(vocab, word_vecs, glove_dict):\n    for word, index in vocab.items():\n        if word in glove_dict:\n            embedding = glove_dict[word]\n        elif word.lower() in glove_dict:\n            embedding = glove_dict[word.lower()]\n        else: embedding = None\n\n        if embedding is not None:\n            word_vecs[index] = embedding\n    return word_vecs\n\n\nclass CoNLLReader(object):\n    def __init__(self, file_path, word_vocab, char_vocab, ner_vocab):\n        self.__source_file = open(file_path, 'r', encoding='utf-8')\n        self.__word_vocab = word_vocab\n        self.__char_vocab = char_vocab\n        self.__ner_vocab = ner_vocab\n\n    def close(self):\n        self.__source_file.close()\n\n    def getNext(self, normalize_digits=True):\n        line = self.__source_file.readline()\n        # skip multiple blank lines.\n        while len(line) > 0 and len(line.strip()) == 0:\n            line = self.__source_file.readline()\n        if len(line) == 0:\n            return None\n\n        lines = []\n        while len(line.strip()) > 0:\n            line = line.strip()\n            lines.append(line.split(' '))\n            line = self.__source_file.readline()\n\n        length = len(lines)\n        if length == 0:\n            return None\n\n        words = []\n        word_ids = []\n        char_seqs = []\n        char_id_seqs = []\n        ner_tags = []\n        ner_ids = []\n\n        for tokens in lines:\n            chars = []\n            char_ids = []\n            for char in tokens[1]:\n                chars.append(char)\n                char_ids.append(self.__char_vocab[char])\n            if len(chars) > MAX_CHAR_LENGTH:\n                chars = chars[:MAX_CHAR_LENGTH]\n                char_ids = char_ids[:MAX_CHAR_LENGTH]\n            char_seqs.append(chars)\n            char_id_seqs.append(char_ids)\n\n            word = DIGIT_RE.sub(\"0\", tokens[1]) if normalize_digits else tokens[1]\n            ner = tokens[4]\n\n            words.append(word)\n            word_ids.append(self.__word_vocab[word])\n\n            ner_tags.append(ner)\n            ner_ids.append(self.__ner_vocab[ner])\n\n        return NERInstance(Sentence(words, word_ids, char_seqs, char_id_seqs), ner_tags, ner_ids)\n\n\nclass NERInstance(object):\n    def __init__(self, sentence, ner_tags, ner_ids):\n        self.sentence = sentence\n        self.ner_tags = ner_tags\n        self.ner_ids = ner_ids\n\n    def length(self):\n        return self.sentence.length()\n\n\nclass Sentence(object):\n    def __init__(self, words, word_ids, char_seqs, char_id_seqs):\n        self.words = words\n        self.word_ids = word_ids\n        self.char_seqs = char_seqs\n        self.char_id_seqs = char_id_seqs\n\n    def length(self):\n        return len(self.words)\n"
  },
  {
    "path": "texar_repo/examples/sequence_tagging/conll_writer.py",
    "content": "__author__ = 'max'\n\n\nclass CoNLLWriter(object):\n    def __init__(self, i2w, i2n):\n        self.__source_file = None\n        self.__i2w = i2w\n        self.__i2n = i2n\n\n    def start(self, file_path):\n        self.__source_file = open(file_path, 'w', encoding='utf-8')\n\n    def close(self):\n        self.__source_file.close()\n\n    def write(self, word, predictions, targets, lengths):\n        batch_size, _ = word.shape\n        for i in range(batch_size):\n            for j in range(lengths[i]):\n                w = self.__i2w[word[i, j]]\n                tgt = self.__i2n[targets[i, j]]\n                pred = self.__i2n[predictions[i, j]]\n                self.__source_file.write('%d %s %s %s %s %s\\n' % (j + 1, w, \"_\", \"_\", tgt, pred))\n            self.__source_file.write('\\n')\n"
  },
  {
    "path": "texar_repo/examples/sequence_tagging/conlleval",
    "content": "#!/usr/bin/perl -w\n# conlleval: evaluate result of processing CoNLL-2000 shared task\n# usage:     conlleval [-l] [-r] [-d delimiterTag] [-o oTag] < file\n#            README: http://cnts.uia.ac.be/conll2000/chunking/output.html\n# options:   l: generate LaTeX output for tables like in\n#               http://cnts.uia.ac.be/conll2003/ner/example.tex\n#            r: accept raw result tags (without B- and I- prefix;\n#                                       assumes one word per chunk)\n#            d: alternative delimiter tag (default is single space)\n#            o: alternative outside tag (default is O)\n# note:      the file should contain lines with items separated\n#            by $delimiter characters (default space). The final\n#            two items should contain the correct tag and the \n#            guessed tag in that order. Sentences should be\n#            separated from each other by empty lines or lines\n#            with $boundary fields (default -X-).\n# url:       http://lcg-www.uia.ac.be/conll2000/chunking/\n# started:   1998-09-25\n# version:   2004-01-26\n# author:    Erik Tjong Kim Sang <erikt@uia.ua.ac.be>\n\nuse strict;\n\nmy $false = 0;\nmy $true = 42;\n\nmy $boundary = \"-X-\";     # sentence boundary\nmy $correct;              # current corpus chunk tag (I,O,B)\nmy $correctChunk = 0;     # number of correctly identified chunks\nmy $correctTags = 0;      # number of correct chunk tags\nmy $correctType;          # type of current corpus chunk tag (NP,VP,etc.)\nmy $delimiter = \" \";      # field delimiter\nmy $FB1 = 0.0;            # FB1 score (Van Rijsbergen 1979)\nmy $firstItem;            # first feature (for sentence boundary checks)\nmy $foundCorrect = 0;     # number of chunks in corpus\nmy $foundGuessed = 0;     # number of identified chunks\nmy $guessed;              # current guessed chunk tag\nmy $guessedType;          # type of current guessed chunk tag\nmy $i;                    # miscellaneous counter\nmy $inCorrect = $false;   # currently processed chunk is correct until now\nmy $lastCorrect = \"O\";    # previous chunk tag in corpus\nmy $latex = 0;            # generate LaTeX formatted output\nmy $lastCorrectType = \"\"; # type of previously identified chunk tag\nmy $lastGuessed = \"O\";    # previously identified chunk tag\nmy $lastGuessedType = \"\"; # type of previous chunk tag in corpus\nmy $lastType;             # temporary storage for detecting duplicates\nmy $line;                 # line\nmy $nbrOfFeatures = -1;   # number of features per line\nmy $precision = 0.0;      # precision score\nmy $oTag = \"O\";           # outside tag, default O\nmy $raw = 0;              # raw input: add B to every token\nmy $recall = 0.0;         # recall score\nmy $tokenCounter = 0;     # token counter (ignores sentence breaks)\n\nmy %correctChunk = ();    # number of correctly identified chunks per type\nmy %foundCorrect = ();    # number of chunks in corpus per type\nmy %foundGuessed = ();    # number of identified chunks per type\n\nmy @features;             # features on line\nmy @sortedTypes;          # sorted list of chunk type names\n\n# sanity check\nwhile (@ARGV and $ARGV[0] =~ /^-/) {\n   if ($ARGV[0] eq \"-l\") { $latex = 1; shift(@ARGV); }\n   elsif ($ARGV[0] eq \"-r\") { $raw = 1; shift(@ARGV); }\n   elsif ($ARGV[0] eq \"-d\") { \n      shift(@ARGV); \n      if (not defined $ARGV[0]) { \n         die \"conlleval: -d requires delimiter character\"; \n      }\n      $delimiter = shift(@ARGV);\n   } elsif ($ARGV[0] eq \"-o\") {\n      shift(@ARGV);\n      if (not defined $ARGV[0]) {\n         die \"conlleval: -o requires delimiter character\";\n      }\n      $oTag = shift(@ARGV);\n   } else { die \"conlleval: unknown argument $ARGV[0]\\n\"; }\n}\nif (@ARGV) { die \"conlleval: unexpected command line argument\\n\"; }\n# process input\nwhile (<STDIN>) {\n   chomp($line = $_);\n   @features = split(/$delimiter/,$line);\n   if ($nbrOfFeatures < 0) { $nbrOfFeatures = $#features; }\n   elsif ($nbrOfFeatures != $#features and @features != 0) {\n      printf STDERR \"unexpected number of features: %d (%d)\\n\",\n         $#features+1,$nbrOfFeatures+1;\n      exit(1);\n   }\n   if (@features == 0 or \n       $features[0] eq $boundary) { @features = ($boundary,\"O\",\"O\"); }\n   if (@features < 2) { \n      die \"conlleval: unexpected number of features in line $line\\n\"; \n   }\n   if ($raw) {\n      if ($features[$#features] eq $oTag) { $features[$#features] = \"O\"; } \n      if ($features[$#features-1] eq $oTag) { $features[$#features-1] = \"O\"; } \n      if ($features[$#features] ne \"O\") { \n         $features[$#features] = \"B-$features[$#features]\";\n      }\n      if ($features[$#features-1] ne \"O\") { \n         $features[$#features-1] = \"B-$features[$#features-1]\";\n      }\n   }\n   # 20040126 ET code which allows hyphens in the types\n   if ($features[$#features] =~ /^([^-]*)-(.*)$/) {\n      $guessed = $1;\n      $guessedType = $2;\n   } else { \n      $guessed = $features[$#features]; \n      $guessedType = \"\"; \n   }\n   pop(@features);\n   if ($features[$#features] =~ /^([^-]*)-(.*)$/) {\n      $correct = $1;\n      $correctType = $2;\n   } else { \n      $correct = $features[$#features]; \n      $correctType = \"\"; \n   }\n   pop(@features);\n#  ($guessed,$guessedType) = split(/-/,pop(@features));\n#  ($correct,$correctType) = split(/-/,pop(@features));\n   $guessedType = $guessedType ? $guessedType : \"\";\n   $correctType = $correctType ? $correctType : \"\";\n   $firstItem = shift(@features);\n\n   # 1999-06-26 sentence breaks should always be counted as out of chunk\n   if ( $firstItem eq $boundary ) { $guessed = \"O\"; }\n\n   if ($inCorrect) {\n      if ( &endOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) and\n           &endOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) and\n           $lastGuessedType eq $lastCorrectType) {\n         $inCorrect=$false;\n         $correctChunk++;\n         $correctChunk{$lastCorrectType} = $correctChunk{$lastCorrectType} ?\n             $correctChunk{$lastCorrectType}+1 : 1;\n      } elsif ( \n           &endOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) != \n           &endOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) or\n           $guessedType ne $correctType ) {\n         $inCorrect=$false; \n      }\n   }\n\n   if ( &startOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) and \n        &startOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) and\n        $guessedType eq $correctType) { $inCorrect = $true; }\n\n   if ( &startOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) ) {\n      $foundCorrect++; \n      $foundCorrect{$correctType} = $foundCorrect{$correctType} ?\n          $foundCorrect{$correctType}+1 : 1;\n   }\n   if ( &startOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) ) {\n      $foundGuessed++; \n      $foundGuessed{$guessedType} = $foundGuessed{$guessedType} ?\n          $foundGuessed{$guessedType}+1 : 1;\n   }\n   if ( $firstItem ne $boundary ) { \n      if ( $correct eq $guessed and $guessedType eq $correctType ) { \n         $correctTags++; \n      }\n      $tokenCounter++; \n   }\n\n   $lastGuessed = $guessed;\n   $lastCorrect = $correct;\n   $lastGuessedType = $guessedType;\n   $lastCorrectType = $correctType;\n}\nif ($inCorrect) { \n   $correctChunk++;\n   $correctChunk{$lastCorrectType} = $correctChunk{$lastCorrectType} ?\n       $correctChunk{$lastCorrectType}+1 : 1;\n}\n\nif (not $latex) {\n   # compute overall precision, recall and FB1 (default values are 0.0)\n   $precision = 100*$correctChunk/$foundGuessed if ($foundGuessed > 0);\n   $recall = 100*$correctChunk/$foundCorrect if ($foundCorrect > 0);\n   $FB1 = 2*$precision*$recall/($precision+$recall)\n      if ($precision+$recall > 0);\n   \n   # print overall performance\n   printf \"processed $tokenCounter tokens with $foundCorrect phrases; \";\n   printf \"found: $foundGuessed phrases; correct: $correctChunk.\\n\";\n   if ($tokenCounter>0) {\n      printf \"accuracy: %6.2f%%; \",100*$correctTags/$tokenCounter;\n      printf \"precision: %6.2f%%; \",$precision;\n      printf \"recall: %6.2f%%; \",$recall;\n      printf \"FB1: %6.2f\\n\",$FB1;\n   }\n}\n\n# sort chunk type names\nundef($lastType);\n@sortedTypes = ();\nforeach $i (sort (keys %foundCorrect,keys %foundGuessed)) {\n   if (not($lastType) or $lastType ne $i) { \n      push(@sortedTypes,($i));\n   }\n   $lastType = $i;\n}\n# print performance per chunk type\nif (not $latex) {\n   for $i (@sortedTypes) {\n      $correctChunk{$i} = $correctChunk{$i} ? $correctChunk{$i} : 0;\n      if (not($foundGuessed{$i})) { $foundGuessed{$i} = 0; $precision = 0.0; }\n      else { $precision = 100*$correctChunk{$i}/$foundGuessed{$i}; }\n      if (not($foundCorrect{$i})) { $recall = 0.0; }\n      else { $recall = 100*$correctChunk{$i}/$foundCorrect{$i}; }\n      if ($precision+$recall == 0.0) { $FB1 = 0.0; }\n      else { $FB1 = 2*$precision*$recall/($precision+$recall); }\n      printf \"%17s: \",$i;\n      printf \"precision: %6.2f%%; \",$precision;\n      printf \"recall: %6.2f%%; \",$recall;\n      printf \"FB1: %6.2f  %d\\n\",$FB1,$foundGuessed{$i};\n   }\n} else {\n   print \"        & Precision &  Recall  & F\\$_{\\\\beta=1} \\\\\\\\\\\\hline\";\n   for $i (@sortedTypes) {\n      $correctChunk{$i} = $correctChunk{$i} ? $correctChunk{$i} : 0;\n      if (not($foundGuessed{$i})) { $precision = 0.0; }\n      else { $precision = 100*$correctChunk{$i}/$foundGuessed{$i}; }\n      if (not($foundCorrect{$i})) { $recall = 0.0; }\n      else { $recall = 100*$correctChunk{$i}/$foundCorrect{$i}; }\n      if ($precision+$recall == 0.0) { $FB1 = 0.0; }\n      else { $FB1 = 2*$precision*$recall/($precision+$recall); }\n      printf \"\\n%-7s &  %6.2f\\\\%% & %6.2f\\\\%% & %6.2f \\\\\\\\\",\n             $i,$precision,$recall,$FB1;\n   }\n   print \"\\\\hline\\n\";\n   $precision = 0.0;\n   $recall = 0;\n   $FB1 = 0.0;\n   $precision = 100*$correctChunk/$foundGuessed if ($foundGuessed > 0);\n   $recall = 100*$correctChunk/$foundCorrect if ($foundCorrect > 0);\n   $FB1 = 2*$precision*$recall/($precision+$recall)\n      if ($precision+$recall > 0);\n   printf STDOUT \"Overall &  %6.2f\\\\%% & %6.2f\\\\%% & %6.2f \\\\\\\\\\\\hline\\n\",\n          $precision,$recall,$FB1;\n}\n\nexit 0;\n\n# endOfChunk: checks if a chunk ended between the previous and current word\n# arguments:  previous and current chunk tags, previous and current types\n# note:       this code is capable of handling other chunk representations\n#             than the default CoNLL-2000 ones, see EACL'99 paper of Tjong\n#             Kim Sang and Veenstra http://xxx.lanl.gov/abs/cs.CL/9907006\n\nsub endOfChunk {\n   my $prevTag = shift(@_);\n   my $tag = shift(@_);\n   my $prevType = shift(@_);\n   my $type = shift(@_);\n   my $chunkEnd = $false;\n\n   if ( $prevTag eq \"B\" and $tag eq \"B\" ) { $chunkEnd = $true; }\n   if ( $prevTag eq \"B\" and $tag eq \"O\" ) { $chunkEnd = $true; }\n   if ( $prevTag eq \"I\" and $tag eq \"B\" ) { $chunkEnd = $true; }\n   if ( $prevTag eq \"I\" and $tag eq \"O\" ) { $chunkEnd = $true; }\n\n   if ( $prevTag eq \"E\" and $tag eq \"E\" ) { $chunkEnd = $true; }\n   if ( $prevTag eq \"E\" and $tag eq \"I\" ) { $chunkEnd = $true; }\n   if ( $prevTag eq \"E\" and $tag eq \"O\" ) { $chunkEnd = $true; }\n   if ( $prevTag eq \"I\" and $tag eq \"O\" ) { $chunkEnd = $true; }\n\n   if ($prevTag ne \"O\" and $prevTag ne \".\" and $prevType ne $type) { \n      $chunkEnd = $true; \n   }\n\n   # corrected 1998-12-22: these chunks are assumed to have length 1\n   if ( $prevTag eq \"]\" ) { $chunkEnd = $true; }\n   if ( $prevTag eq \"[\" ) { $chunkEnd = $true; }\n\n   return($chunkEnd);   \n}\n\n# startOfChunk: checks if a chunk started between the previous and current word\n# arguments:    previous and current chunk tags, previous and current types\n# note:         this code is capable of handling other chunk representations\n#               than the default CoNLL-2000 ones, see EACL'99 paper of Tjong\n#               Kim Sang and Veenstra http://xxx.lanl.gov/abs/cs.CL/9907006\n\nsub startOfChunk {\n   my $prevTag = shift(@_);\n   my $tag = shift(@_);\n   my $prevType = shift(@_);\n   my $type = shift(@_);\n   my $chunkStart = $false;\n\n   if ( $prevTag eq \"B\" and $tag eq \"B\" ) { $chunkStart = $true; }\n   if ( $prevTag eq \"I\" and $tag eq \"B\" ) { $chunkStart = $true; }\n   if ( $prevTag eq \"O\" and $tag eq \"B\" ) { $chunkStart = $true; }\n   if ( $prevTag eq \"O\" and $tag eq \"I\" ) { $chunkStart = $true; }\n\n   if ( $prevTag eq \"E\" and $tag eq \"E\" ) { $chunkStart = $true; }\n   if ( $prevTag eq \"E\" and $tag eq \"I\" ) { $chunkStart = $true; }\n   if ( $prevTag eq \"O\" and $tag eq \"E\" ) { $chunkStart = $true; }\n   if ( $prevTag eq \"O\" and $tag eq \"I\" ) { $chunkStart = $true; }\n\n   if ($tag ne \"O\" and $tag ne \".\" and $prevType ne $type) { \n      $chunkStart = $true; \n   }\n\n   # corrected 1998-12-22: these chunks are assumed to have length 1\n   if ( $tag eq \"[\" ) { $chunkStart = $true; }\n   if ( $tag eq \"]\" ) { $chunkStart = $true; }\n\n   return($chunkStart);   \n}\n"
  },
  {
    "path": "texar_repo/examples/sequence_tagging/ner.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Sequence tagging.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\nimport time\nimport importlib\nimport numpy as np\nimport tensorflow as tf\nimport texar as tx\n\nfrom examples.sequence_tagging.conll_reader import create_vocabs, read_data, iterate_batch, load_glove, construct_init_word_vecs\nfrom examples.sequence_tagging.conll_writer import CoNLLWriter\nfrom examples.sequence_tagging import scores\n\nflags = tf.flags\n\nflags.DEFINE_string(\"data_path\", \"./data\",\n                    \"Directory containing NER data (e.g., eng.train.bio.conll).\")\nflags.DEFINE_string(\"train\", \"eng.train.bio.conll\",\n                    \"the file name of the training data.\")\nflags.DEFINE_string(\"dev\", \"eng.dev.bio.conll\",\n                    \"the file name of the dev data.\")\nflags.DEFINE_string(\"test\", \"eng.test.bio.conll\",\n                    \"the file name of the test data.\")\nflags.DEFINE_string(\"embedding\", \"glove.6B.100d.txt\",\n                    \"the file name of the GloVe embedding.\")\nflags.DEFINE_string(\"config\", \"config\", \"The config to use.\")\n\nFLAGS = flags.FLAGS\n\nconfig = importlib.import_module(FLAGS.config)\n\ntrain_path = os.path.join(FLAGS.data_path, FLAGS.train)\ndev_path = os.path.join(FLAGS.data_path, FLAGS.dev)\ntest_path = os.path.join(FLAGS.data_path, FLAGS.test)\nembedding_path = os.path.join(FLAGS.data_path, FLAGS.embedding)\nEMBEDD_DIM = config.embed_dim\nCHAR_DIM = config.char_dim\n\n# Prepares/loads data\nif config.load_glove:\n    print('loading GloVe embedding...')\n    glove_dict = load_glove(embedding_path, EMBEDD_DIM)\nelse:\n    glove_dict = None\n\n(word_vocab, char_vocab, ner_vocab), (i2w, i2n) = create_vocabs(train_path, dev_path, test_path, glove_dict=glove_dict)\n\ndata_train = read_data(train_path, word_vocab, char_vocab, ner_vocab)\ndata_dev = read_data(dev_path, word_vocab, char_vocab, ner_vocab)\ndata_test = read_data(test_path, word_vocab, char_vocab, ner_vocab)\n\nscale = np.sqrt(3.0 / EMBEDD_DIM)\nword_vecs = np.random.uniform(-scale, scale, [len(word_vocab), EMBEDD_DIM]).astype(np.float32)\nif config.load_glove:\n    word_vecs = construct_init_word_vecs(word_vocab, word_vecs, glove_dict)\n\nscale = np.sqrt(3.0 / CHAR_DIM)\nchar_vecs = np.random.uniform(-scale, scale, [len(char_vocab), CHAR_DIM]).astype(np.float32)\n\n# Builds TF graph\ninputs = tf.placeholder(tf.int64, [None, None])\nchars = tf.placeholder(tf.int64, [None, None, None])\ntargets = tf.placeholder(tf.int64, [None, None])\nmasks = tf.placeholder(tf.float32, [None, None])\nseq_lengths = tf.placeholder(tf.int64, [None])\n\nvocab_size = len(word_vecs)\nembedder = tx.modules.WordEmbedder(vocab_size=vocab_size, init_value=word_vecs, hparams=config.emb)\nemb_inputs = embedder(inputs)\n\nchar_size = len(char_vecs)\nchar_embedder = tx.modules.WordEmbedder(vocab_size=char_size, init_value=char_vecs, hparams=config.char_emb)\nemb_chars = char_embedder(chars)\nchar_shape = tf.shape(emb_chars) # [batch, length, char_length, char_dim]\nemb_chars = tf.reshape(emb_chars, (-1, char_shape[2], CHAR_DIM))\nchar_encoder = tx.modules.Conv1DEncoder(config.conv)\nchar_outputs = char_encoder(emb_chars)\nchar_outputs = tf.reshape(char_outputs, (char_shape[0], char_shape[1], config.conv['filters']))\n\nemb_inputs = tf.concat([emb_inputs, char_outputs], axis=2)\nemb_inputs = tf.nn.dropout(emb_inputs, keep_prob=0.67)\n\nencoder = tx.modules.BidirectionalRNNEncoder(hparams={\"rnn_cell_fw\": config.cell, \"rnn_cell_bw\": config.cell})\noutputs, _ = encoder(emb_inputs, sequence_length=seq_lengths)\noutputs = tf.concat(outputs, axis=2)\n\nrnn_shape = tf.shape(outputs)\noutputs = tf.reshape(outputs, (-1, 2 * config.hidden_size))\n\noutputs = tf.layers.dense(outputs, config.tag_space, activation=tf.nn.elu)\noutputs = tf.nn.dropout(outputs, keep_prob=config.keep_prob)\n\nlogits = tf.layers.dense(outputs, len(ner_vocab))\n\nlogits = tf.reshape(logits, tf.concat([rnn_shape[0:2], [len(ner_vocab)]], axis=0))\n\nmle_loss = tx.losses.sequence_sparse_softmax_cross_entropy(\n    labels=targets,\n    logits=logits,\n    sequence_length=seq_lengths,\n    average_across_batch=True,\n    average_across_timesteps=True,\n    sum_over_timesteps=False)\n\npredicts = tf.argmax(logits, axis=2)\ncorrects = tf.reduce_sum(tf.cast(tf.equal(targets, predicts), tf.float32) * masks)\n\nglobal_step = tf.placeholder(tf.int32)\ntrain_op = tx.core.get_train_op(\n    mle_loss, global_step=global_step, increment_global_step=False,\n    hparams=config.opt)\n\n# Training/eval processes\n\ndef _train_epoch(sess, epoch):\n    start_time = time.time()\n    loss = 0.\n    corr = 0.\n    num_tokens = 0.\n\n    fetches = {\n        \"mle_loss\": mle_loss,\n        \"correct\": corrects,\n    }\n    fetches[\"train_op\"] = train_op\n\n    mode = tf.estimator.ModeKeys.TRAIN\n    num_inst = 0\n    for batch in iterate_batch(data_train, config.batch_size, shuffle=True):\n        word, char, ner, mask, length = batch\n        feed_dict = {\n            inputs: word, chars: char, targets: ner, masks: mask, seq_lengths: length,\n            global_step: epoch, tx.global_mode(): mode,\n        }\n\n        rets = sess.run(fetches, feed_dict)\n        nums = np.sum(length)\n        num_inst += len(word)\n        loss += rets[\"mle_loss\"] * nums\n        corr += rets[\"correct\"]\n        num_tokens += nums\n\n        print(\"train: %d (%d/%d) loss: %.4f, acc: %.2f%%\" % (epoch, num_inst, len(data_train), loss / num_tokens, corr / num_tokens * 100))\n    print(\"train: %d loss: %.4f, acc: %.2f%%, time: %.2fs\" % (epoch, loss / num_tokens, corr / num_tokens * 100, time.time() - start_time))\n\n\ndef _eval(sess, epoch, data_tag):\n    fetches = {\n        \"predicts\": predicts,\n    }\n    mode = tf.estimator.ModeKeys.EVAL\n    file_name = 'tmp/%s%d' % (data_tag, epoch)\n    writer = CoNLLWriter(i2w, i2n)\n    writer.start(file_name)\n    data = data_dev if data_tag == 'dev' else data_test\n    for batch in iterate_batch(data, config.batch_size, shuffle=False):\n        word, char, ner, mask, length = batch\n        feed_dict = {\n            inputs: word, chars: char, targets: ner, masks: mask, seq_lengths: length,\n            global_step: epoch, tx.global_mode(): mode,\n        }\n        rets = sess.run(fetches, feed_dict)\n        predictions = rets['predicts']\n        writer.write(word, predictions, ner, length)\n    writer.close()\n    acc, precision, recall, f1 = scores.scores(file_name)\n    print('%s acc: %.2f%%, precision: %.2f%%, recall: %.2f%%, F1: %.2f%%' % (data_tag, acc, precision, recall, f1))\n    return acc, precision, recall, f1\n\n\nwith tf.Session() as sess:\n    sess.run(tf.global_variables_initializer())\n    sess.run(tf.local_variables_initializer())\n    sess.run(tf.tables_initializer())\n\n    dev_f1 = 0.0\n    dev_acc = 0.0\n    dev_precision = 0.0\n    dev_recall = 0.0\n    best_epoch = 0\n\n    test_f1 = 0.0\n    test_acc = 0.0\n    test_prec = 0.0\n    test_recall = 0.0\n\n    tx.utils.maybe_create_dir('./tmp')\n\n    for epoch in range(config.num_epochs):\n        _train_epoch(sess, epoch)\n        acc, precision, recall, f1 = _eval(sess, epoch, 'dev')\n        if dev_f1 < f1:\n            dev_f1 = f1\n            dev_acc = acc\n            dev_precision = precision\n            dev_recall = recall\n            best_epoch = epoch\n            test_acc, test_prec, test_recall, test_f1 = _eval(sess, epoch, 'test')\n        print('best acc: %.2f%%, precision: %.2f%%, recall: %.2f%%, F1: %.2f%%, epoch: %d' % (dev_acc, dev_precision, dev_recall, dev_f1, best_epoch))\n        print('test acc: %.2f%%, precision: %.2f%%, recall: %.2f%%, F1: %.2f%%, epoch: %d' % (test_acc, test_prec, test_recall, test_f1, best_epoch))\n        print('---------------------------------------------------')\n"
  },
  {
    "path": "texar_repo/examples/sequence_tagging/scores.py",
    "content": "import subprocess\nimport sys \n\ndef scores(path):\n  bashCommand = 'perl conlleval'\n  process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE, stdin=open(path))\n  output, error = process.communicate()\n  output = output.decode().split('\\n')[1].split('%; ')\n  output = [out.split(' ')[-1] for out in output]\n  acc, prec, recall, fb1 = tuple(output)\n  return float(acc), float(prec), float(recall), float(fb1)\n  \n"
  },
  {
    "path": "texar_repo/examples/text_style_transfer/README.md",
    "content": "# Text Style Transfer #\n\nThis example implements a simplified variant of the `ctrl-gen` model from \n\n[Toward Controlled Generation of Text](https://arxiv.org/pdf/1703.00955.pdf)  \n*Zhiting Hu, Zichao Yang, Xiaodan Liang, Ruslan Salakhutdinov, Eric Xing; ICML 2017*\n\nThe model roughly has an architecture of `Encoder--Decoder--Classifier`. Compared to the paper, following simplications are made:\n  \n  * Replaces the base Variational Autoencoder (VAE) model with an attentional Autoencoder (AE) -- VAE is not necessary in the text style transfer setting since we do not need to interpolate the latent space as in the paper.\n  * Attribute classifier (i.e., discriminator) is trained with real data only. Samples generated by the decoder are not used.\n  * Independency constraint is omitted.\n\n## Usage ##\n\n### Dataset ###\nDownload the yelp sentiment dataset with the following cmd:\n```\npython prepare_data.py\n```\n\n### Train the model ###\n\nTrain the model on the above data to do sentiment transfer.\n```\npython main.py --config config\n```\n\n[config.py](./config.py) contains the data and mode configurations. \n\n* The model will first be pre-trained for a few epochs (specified in `config.py`). During pre-training, the `Encoder-Decoder` part is trained as an autoencoder, while the `Classifier` part is trained with the classification labels.\n* Full-training is then performed for another few epochs. During full-training, the `Classifier` part is fixed, and the `Encoder-Decoder` part is trained to fit the classifier, along with continuing to minimize the autoencoding loss.\n\nTraining log is printed as below:\n```\ngamma: 1.0, lambda_g: 0.0\nstep: 1, loss_d: 0.6903 accu_d: 0.5625\nstep: 1, loss_g_clas: 0.6991 loss_g: 9.1452 accu_g: 0.2812 loss_g_ae: 9.1452 accu_g_gdy: 0.2969\nstep: 500, loss_d: 0.0989 accu_d: 0.9688\nstep: 500, loss_g_clas: 0.2985 loss_g: 3.9696 accu_g: 0.8891 loss_g_ae: 3.9696 accu_g_gdy: 0.7734\n...\nstep: 6500, loss_d: 0.0806 accu_d: 0.9703\nstep: 6500, loss_g_clas: 5.7137 loss_g: 0.2887 accu_g: 0.0844 loss_g_ae: 0.2887 accu_g_gdy: 0.0625\nepoch: 1, loss_d: 0.0876 accu_d: 0.9719\nepoch: 1, loss_g_clas: 6.7360 loss_g: 0.2195 accu_g: 0.0627 loss_g_ae: 0.2195 accu_g_gdy: 0.0642\nval: accu_g: 0.0445 loss_g_ae: 0.1302 accu_d: 0.9774 bleu: 90.7896 loss_g: 0.1302 loss_d: 0.0666 loss_g_clas: 7.0310 accu_g_gdy: 0.0482\n...\n\n```\nwhere:\n- `loss_d` and `accu_d` are the classification loss/accuracy of the `Classifier` part.\n- `loss_g_clas` is the classification loss of the generated sentences.\n- `loss_g_ae` is the autoencoding loss.\n- `loss_g` is the joint loss `= loss_g_ae + lambda_g * loss_g_clas`.\n- `accu_g` is the classification accuracy of the generated sentences with soft represetations (i.e., Gumbel-softmax).\n- `accu_g_gdy` is the classification accuracy of the generated sentences with greedy decoding.\n- `bleu` is the BLEU score between the generated and input sentences.\n\n## Results ##\n\nText style transfer has two primary goals:\n1. The generated sentence should have desired attribute (e.g., positive/negative sentiment)\n2. The generated sentence should keep the content of the original one\n\nWe use automatic metrics to evaluate both: \n* For (1), we can use a pre-trained classifier to classify the generated sentences and evaluate the accuracy (the higher the better). In this code we have not implemented a stand-alone classifier for evaluation, which could be very easy though. The `Classifier` part in the model gives a reasonably good estimation (i.e., `accu_g_gdy` in the above) of the accuracy. \n* For (2), we evaluate the BLEU score between the generated sentences and the original sentences, i.e., `bleu` in the above (the higher the better) (See [Yang et al., 2018](https://arxiv.org/pdf/1805.11749.pdf) for more details.)\n\nThe implementation here gives the following performance after 10 epochs of pre-training and 2 epochs of full-training:\n\n| Accuracy (by the `Classifier` part)  | BLEU (with the original sentence) |\n| -------------------------------------| ----------------------------------|\n| 0.92 | 54.0  |\n\nAlso refer to the following papers that used this code and compared to other text style transfer approaches:\n\n* [Unsupervised Text Style Transfer using Language Models as Discriminators](https://papers.nips.cc/paper/7959-unsupervised-text-style-transfer-using-language-models-as-discriminators.pdf). Zichao Yang, Zhiting Hu, Chris Dyer, Eric Xing, Taylor Berg-Kirkpatrick. NeurIPS 2018\n* [Structured Content Preservation for Unsupervised Text Style Transfer](https://arxiv.org/pdf/1810.06526.pdf). Youzhi Tian, Zhiting Hu, Zhou Yu. 2018\n\n### Samples ###\nHere are some randomly-picked samples. In each pair, the first sentence is the original sentence and the second is the generated.\n```\ngo to place for client visits with gorgeous views .\ngo to place for client visits with lacking views .\n\nthere was lots of people but they still managed to provide great service .\nthere was lots of people but they still managed to provide careless service .\n\nthis was the best dining experience i have ever had .\nthis was the worst dining experience i have ever had .\n\nneedless to say , we skipped desert .\ngentle to say , we edgy desert . \n\nthe first time i was missing an entire sandwich and a side of fries .\nthe first time i was beautifully an entire sandwich and a side of fries .\n\nher boutique has a fabulous selection of designer brands !\nher annoying has a sketchy selection of bland warned !\n\nservice is pretty good .\nservice is trashy rude .\n\nok nothing new .\nexceptional impressed new .\n```\n"
  },
  {
    "path": "texar_repo/examples/text_style_transfer/config.py",
    "content": "\"\"\"Config\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=invalid-name\n\nimport copy\n\nmax_nepochs = 12 # Total number of training epochs\n                 # (including pre-train and full-train)\npretrain_nepochs = 10 # Number of pre-train epochs (training as autoencoder)\ndisplay = 500  # Display the training results every N training steps.\ndisplay_eval = 1e10 # Display the dev results every N training steps (set to a\n                    # very large value to disable it).\nsample_path = './samples'\ncheckpoint_path = './checkpoints'\nrestore = ''   # Model snapshot to restore from\n\nlambda_g = 0.1    # Weight of the classification loss\ngamma_decay = 0.5 # Gumbel-softmax temperature anneal rate\n\ntrain_data = {\n    'batch_size': 64,\n    #'seed': 123,\n    'datasets': [\n        {\n            'files': './data/yelp/sentiment.train.text',\n            'vocab_file': './data/yelp/vocab',\n            'data_name': ''\n        },\n        {\n            'files': './data/yelp/sentiment.train.labels',\n            'data_type': 'int',\n            'data_name': 'labels'\n        }\n    ],\n    'name': 'train'\n}\n\nval_data = copy.deepcopy(train_data)\nval_data['datasets'][0]['files'] = './data/yelp/sentiment.dev.text'\nval_data['datasets'][1]['files'] = './data/yelp/sentiment.dev.labels'\n\ntest_data = copy.deepcopy(train_data)\ntest_data['datasets'][0]['files'] = './data/yelp/sentiment.test.text'\ntest_data['datasets'][1]['files'] = './data/yelp/sentiment.test.labels'\n\nmodel = {\n    'dim_c': 200,\n    'dim_z': 500,\n    'embedder': {\n        'dim': 100,\n    },\n    'encoder': {\n        'rnn_cell': {\n            'type': 'GRUCell',\n            'kwargs': {\n                'num_units': 700\n            },\n            'dropout': {\n                'input_keep_prob': 0.5\n            }\n        }\n    },\n    'decoder': {\n        'rnn_cell': {\n            'type': 'GRUCell',\n            'kwargs': {\n                'num_units': 700,\n            },\n            'dropout': {\n                'input_keep_prob': 0.5,\n                'output_keep_prob': 0.5\n            },\n        },\n        'attention': {\n            'type': 'BahdanauAttention',\n            'kwargs': {\n                'num_units': 700,\n            },\n            'attention_layer_size': 700,\n        },\n        'max_decoding_length_train': 21,\n        'max_decoding_length_infer': 20,\n    },\n    'classifier': {\n        'kernel_size': [3, 4, 5],\n        'filters': 128,\n        'other_conv_kwargs': {'padding': 'same'},\n        'dropout_conv': [1],\n        'dropout_rate': 0.5,\n        'num_dense_layers': 0,\n        'num_classes': 1\n    },\n    'opt': {\n        'optimizer': {\n            'type':  'AdamOptimizer',\n            'kwargs': {\n                'learning_rate': 5e-4,\n            },\n        },\n    },\n}\n"
  },
  {
    "path": "texar_repo/examples/text_style_transfer/ctrl_gen_model.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Text style transfer\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=invalid-name, too-many-locals\n\nimport tensorflow as tf\n\nimport texar as tx\nfrom texar.modules import WordEmbedder, UnidirectionalRNNEncoder, \\\n        MLPTransformConnector, AttentionRNNDecoder, \\\n        GumbelSoftmaxEmbeddingHelper, Conv1DClassifier\nfrom texar.core import get_train_op\nfrom texar.utils import collect_trainable_variables, get_batch_size\n\n\nclass CtrlGenModel(object):\n    \"\"\"Control\n    \"\"\"\n\n    def __init__(self, inputs, vocab, gamma, lambda_g, hparams=None):\n        self._hparams = tx.HParams(hparams, None)\n        self._build_model(inputs, vocab, gamma, lambda_g)\n\n    def _build_model(self, inputs, vocab, gamma, lambda_g):\n        \"\"\"Builds the model.\n        \"\"\"\n        embedder = WordEmbedder(\n            vocab_size=vocab.size,\n            hparams=self._hparams.embedder)\n        encoder = UnidirectionalRNNEncoder(hparams=self._hparams.encoder)\n\n        # text_ids for encoder, with BOS token removed\n        enc_text_ids = inputs['text_ids'][:, 1:]\n        enc_outputs, final_state = encoder(embedder(enc_text_ids),\n                                           sequence_length=inputs['length']-1)\n        z = final_state[:, self._hparams.dim_c:]\n\n        # Encodes label\n        label_connector = MLPTransformConnector(self._hparams.dim_c)\n\n        # Gets the sentence representation: h = (c, z)\n        labels = tf.to_float(tf.reshape(inputs['labels'], [-1, 1]))\n        c = label_connector(labels)\n        c_ = label_connector(1 - labels)\n        h = tf.concat([c, z], 1)\n        h_ = tf.concat([c_, z], 1)\n\n        # Teacher-force decoding and the auto-encoding loss for G\n        decoder = AttentionRNNDecoder(\n            memory=enc_outputs,\n            memory_sequence_length=inputs['length']-1,\n            cell_input_fn=lambda inputs, attention: inputs,\n            vocab_size=vocab.size,\n            hparams=self._hparams.decoder)\n\n        connector = MLPTransformConnector(decoder.state_size)\n\n        g_outputs, _, _ = decoder(\n            initial_state=connector(h), inputs=inputs['text_ids'],\n            embedding=embedder, sequence_length=inputs['length']-1)\n\n        loss_g_ae = tx.losses.sequence_sparse_softmax_cross_entropy(\n            labels=inputs['text_ids'][:, 1:],\n            logits=g_outputs.logits,\n            sequence_length=inputs['length']-1,\n            average_across_timesteps=True,\n            sum_over_timesteps=False)\n\n        # Gumbel-softmax decoding, used in training\n        start_tokens = tf.ones_like(inputs['labels']) * vocab.bos_token_id\n        end_token = vocab.eos_token_id\n        gumbel_helper = GumbelSoftmaxEmbeddingHelper(\n            embedder.embedding, start_tokens, end_token, gamma)\n\n        soft_outputs_, _, soft_length_, = decoder(\n            helper=gumbel_helper, initial_state=connector(h_))\n\n        # Greedy decoding, used in eval\n        outputs_, _, length_ = decoder(\n            decoding_strategy='infer_greedy', initial_state=connector(h_),\n            embedding=embedder, start_tokens=start_tokens, end_token=end_token)\n\n        # Creates classifier\n        classifier = Conv1DClassifier(hparams=self._hparams.classifier)\n        clas_embedder = WordEmbedder(vocab_size=vocab.size,\n                                     hparams=self._hparams.embedder)\n\n        # Classification loss for the classifier\n        clas_logits, clas_preds = classifier(\n            inputs=clas_embedder(ids=inputs['text_ids'][:, 1:]),\n            sequence_length=inputs['length']-1)\n        loss_d_clas = tf.nn.sigmoid_cross_entropy_with_logits(\n            labels=tf.to_float(inputs['labels']), logits=clas_logits)\n        loss_d_clas = tf.reduce_mean(loss_d_clas)\n        accu_d = tx.evals.accuracy(labels=inputs['labels'], preds=clas_preds)\n\n        # Classification loss for the generator, based on soft samples\n        soft_logits, soft_preds = classifier(\n            inputs=clas_embedder(soft_ids=soft_outputs_.sample_id),\n            sequence_length=soft_length_)\n        loss_g_clas = tf.nn.sigmoid_cross_entropy_with_logits(\n            labels=tf.to_float(1-inputs['labels']), logits=soft_logits)\n        loss_g_clas = tf.reduce_mean(loss_g_clas)\n\n        # Accuracy on soft samples, for training progress monitoring\n        accu_g = tx.evals.accuracy(labels=1-inputs['labels'], preds=soft_preds)\n\n        # Accuracy on greedy-decoded samples, for training progress monitoring\n        _, gdy_preds = classifier(\n            inputs=clas_embedder(ids=outputs_.sample_id),\n            sequence_length=length_)\n        accu_g_gdy = tx.evals.accuracy(\n            labels=1-inputs['labels'], preds=gdy_preds)\n\n        # Aggregates losses\n        loss_g = loss_g_ae + lambda_g * loss_g_clas\n        loss_d = loss_d_clas\n\n        # Creates optimizers\n        g_vars = collect_trainable_variables(\n            [embedder, encoder, label_connector, connector, decoder])\n        d_vars = collect_trainable_variables([clas_embedder, classifier])\n\n        train_op_g = get_train_op(\n            loss_g, g_vars, hparams=self._hparams.opt)\n        train_op_g_ae = get_train_op(\n            loss_g_ae, g_vars, hparams=self._hparams.opt)\n        train_op_d = get_train_op(\n            loss_d, d_vars, hparams=self._hparams.opt)\n\n        # Interface tensors\n        self.losses = {\n            \"loss_g\": loss_g,\n            \"loss_g_ae\": loss_g_ae,\n            \"loss_g_clas\": loss_g_clas,\n            \"loss_d\": loss_d_clas\n        }\n        self.metrics = {\n            \"accu_d\": accu_d,\n            \"accu_g\": accu_g,\n            \"accu_g_gdy\": accu_g_gdy,\n        }\n        self.train_ops = {\n            \"train_op_g\": train_op_g,\n            \"train_op_g_ae\": train_op_g_ae,\n            \"train_op_d\": train_op_d\n        }\n        self.samples = {\n            \"original\": inputs['text_ids'][:, 1:],\n            \"transferred\": outputs_.sample_id\n        }\n\n        self.fetches_train_g = {\n            \"loss_g\": self.train_ops[\"train_op_g\"],\n            \"loss_g_ae\": self.losses[\"loss_g_ae\"],\n            \"loss_g_clas\": self.losses[\"loss_g_clas\"],\n            \"accu_g\": self.metrics[\"accu_g\"],\n            \"accu_g_gdy\": self.metrics[\"accu_g_gdy\"],\n        }\n        self.fetches_train_d = {\n            \"loss_d\": self.train_ops[\"train_op_d\"],\n            \"accu_d\": self.metrics[\"accu_d\"]\n        }\n        fetches_eval = {\"batch_size\": get_batch_size(inputs['text_ids'])}\n        fetches_eval.update(self.losses)\n        fetches_eval.update(self.metrics)\n        fetches_eval.update(self.samples)\n        self.fetches_eval = fetches_eval\n\n"
  },
  {
    "path": "texar_repo/examples/text_style_transfer/main.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Text style transfer\n\nThis is a simplified implementation of:\n\nToward Controlled Generation of Text, ICML2017\nZhiting Hu, Zichao Yang, Xiaodan Liang, Ruslan Salakhutdinov, Eric Xing\n\nDownload the data with the cmd:\n\n$ python prepare_data.py\n\nTrain the model with the cmd:\n\n$ python main.py --config config\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=invalid-name, too-many-locals, too-many-arguments, no-member\n\nimport os\nimport importlib\nimport numpy as np\nimport tensorflow as tf\nimport texar as tx\n\nfrom ctrl_gen_model import CtrlGenModel\n\nflags = tf.flags\n\nflags.DEFINE_string('config', 'config', 'The config to use.')\n\nFLAGS = flags.FLAGS\n\nconfig = importlib.import_module(FLAGS.config)\n\ndef _main(_):\n    # Data\n    train_data = tx.data.MultiAlignedData(config.train_data)\n    val_data = tx.data.MultiAlignedData(config.val_data)\n    test_data = tx.data.MultiAlignedData(config.test_data)\n    vocab = train_data.vocab(0)\n\n    # Each training batch is used twice: once for updating the generator and\n    # once for updating the discriminator. Feedable data iterator is used for\n    # such case.\n    iterator = tx.data.FeedableDataIterator(\n        {'train_g': train_data, 'train_d': train_data,\n         'val': val_data, 'test': test_data})\n    batch = iterator.get_next()\n\n    # Model\n    gamma = tf.placeholder(dtype=tf.float32, shape=[], name='gamma')\n    lambda_g = tf.placeholder(dtype=tf.float32, shape=[], name='lambda_g')\n    model = CtrlGenModel(batch, vocab, gamma, lambda_g, config.model)\n\n    def _train_epoch(sess, gamma_, lambda_g_, epoch, verbose=True):\n        avg_meters_d = tx.utils.AverageRecorder(size=10)\n        avg_meters_g = tx.utils.AverageRecorder(size=10)\n\n        step = 0\n        while True:\n            try:\n                step += 1\n                feed_dict = {\n                    iterator.handle: iterator.get_handle(sess, 'train_d'),\n                    gamma: gamma_,\n                    lambda_g: lambda_g_\n                }\n\n                vals_d = sess.run(model.fetches_train_d, feed_dict=feed_dict)\n                avg_meters_d.add(vals_d)\n\n                feed_dict = {\n                    iterator.handle: iterator.get_handle(sess, 'train_g'),\n                    gamma: gamma_,\n                    lambda_g: lambda_g_\n                }\n                vals_g = sess.run(model.fetches_train_g, feed_dict=feed_dict)\n                avg_meters_g.add(vals_g)\n\n                if verbose and (step == 1 or step % config.display == 0):\n                    print('step: {}, {}'.format(step, avg_meters_d.to_str(4)))\n                    print('step: {}, {}'.format(step, avg_meters_g.to_str(4)))\n\n                if verbose and step % config.display_eval == 0:\n                    iterator.restart_dataset(sess, 'val')\n                    _eval_epoch(sess, gamma_, lambda_g_, epoch)\n\n            except tf.errors.OutOfRangeError:\n                print('epoch: {}, {}'.format(epoch, avg_meters_d.to_str(4)))\n                print('epoch: {}, {}'.format(epoch, avg_meters_g.to_str(4)))\n                break\n\n    def _eval_epoch(sess, gamma_, lambda_g_, epoch, val_or_test='val'):\n        avg_meters = tx.utils.AverageRecorder()\n\n        while True:\n            try:\n                feed_dict = {\n                    iterator.handle: iterator.get_handle(sess, val_or_test),\n                    gamma: gamma_,\n                    lambda_g: lambda_g_,\n                    tx.context.global_mode(): tf.estimator.ModeKeys.EVAL\n                }\n\n                vals = sess.run(model.fetches_eval, feed_dict=feed_dict)\n\n                batch_size = vals.pop('batch_size')\n\n                # Computes BLEU\n                samples = tx.utils.dict_pop(vals, list(model.samples.keys()))\n                hyps = tx.utils.map_ids_to_strs(samples['transferred'], vocab)\n\n                refs = tx.utils.map_ids_to_strs(samples['original'], vocab)\n                refs = np.expand_dims(refs, axis=1)\n\n                bleu = tx.evals.corpus_bleu_moses(refs, hyps)\n                vals['bleu'] = bleu\n\n                avg_meters.add(vals, weight=batch_size)\n\n                # Writes samples\n                tx.utils.write_paired_text(\n                    refs.squeeze(), hyps,\n                    os.path.join(config.sample_path, 'val.%d'%epoch),\n                    append=True, mode='v')\n\n            except tf.errors.OutOfRangeError:\n                print('{}: {}'.format(\n                    val_or_test, avg_meters.to_str(precision=4)))\n                break\n\n        return avg_meters.avg()\n\n    tf.gfile.MakeDirs(config.sample_path)\n    tf.gfile.MakeDirs(config.checkpoint_path)\n\n    # Runs the logics\n    with tf.Session() as sess:\n        sess.run(tf.global_variables_initializer())\n        sess.run(tf.local_variables_initializer())\n        sess.run(tf.tables_initializer())\n\n        saver = tf.train.Saver(max_to_keep=None)\n        if config.restore:\n            print('Restore from: {}'.format(config.restore))\n            saver.restore(sess, config.restore)\n\n        iterator.initialize_dataset(sess)\n\n        gamma_ = 1.\n        lambda_g_ = 0.\n        for epoch in range(1, config.max_nepochs+1):\n            if epoch > config.pretrain_nepochs:\n                # Anneals the gumbel-softmax temperature\n                gamma_ = max(0.001, gamma_ * config.gamma_decay)\n                lambda_g_ = config.lambda_g\n            print('gamma: {}, lambda_g: {}'.format(gamma_, lambda_g_))\n\n            # Train\n            iterator.restart_dataset(sess, ['train_g', 'train_d'])\n            _train_epoch(sess, gamma_, lambda_g_, epoch)\n\n            # Val\n            iterator.restart_dataset(sess, 'val')\n            _eval_epoch(sess, gamma_, lambda_g_, epoch, 'val')\n\n            saver.save(\n                sess, os.path.join(config.checkpoint_path, 'ckpt'), epoch)\n\n            # Test\n            iterator.restart_dataset(sess, 'test')\n            _eval_epoch(sess, gamma_, lambda_g_, epoch, 'test')\n\nif __name__ == '__main__':\n    tf.app.run(main=_main)\n"
  },
  {
    "path": "texar_repo/examples/text_style_transfer/prepare_data.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Downloads data.\n\"\"\"\nimport texar as tx\n\n# pylint: disable=invalid-name\n\ndef prepare_data():\n    \"\"\"Downloads data.\n    \"\"\"\n    tx.data.maybe_download(\n        urls='https://drive.google.com/file/d/'\n             '1HaUKEYDBEk6GlJGmXwqYteB-4rS9q8Lg/view?usp=sharing',\n        path='./',\n        filenames='yelp.zip',\n        extract=True)\n\ndef main():\n    \"\"\"Entrypoint.\n    \"\"\"\n    prepare_data()\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "texar_repo/examples/torchtext/.gitignore",
    "content": ".data/\n.vector_cache/"
  },
  {
    "path": "texar_repo/examples/torchtext/README.md",
    "content": "# Data loading with torchtext #\n\nThis example demonstrates the use of [torchtext](https://github.com/pytorch/text) package as data loader for Texar models. \n\n## Usage ##\n\nThe following command trains a small-sized language model on PTB:\n\n```\npython lm_torchtext.py --config config_small\n```\n"
  },
  {
    "path": "texar_repo/examples/torchtext/batchfirst_bptt.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nimport math\nfrom torchtext.data import BPTTIterator, Dataset, Batch\n\n\nclass BatchFirstBPTTIterator(BPTTIterator):\n    \"\"\"Defines an iterator for language modeling tasks that use BPTT.\n\n    Provides contiguous streams of examples together with targets that are\n    one timestep further forward, for language modeling training with\n    backpropagation through time (BPTT). Expects a Dataset with a single\n    example and a single field called 'text' and produces Batches with text and\n    target attributes.\n\n    All batches will have sizes [batch_size, bptt_len]\n\n    Attributes:\n        dataset: The Dataset object to load Examples from.\n        batch_size: Batch size.\n        bptt_len: Length of sequences for backpropagation through time.\n        sort_key: A key to use for sorting examples in order to batch together\n            examples with similar lengths and minimize padding. The sort_key\n            provided to the Iterator constructor overrides the sort_key\n            attribute of the Dataset, or defers to it if None.\n        train: Whether the iterator represents a train set.\n        repeat: Whether to repeat the iterator for multiple epochs.\n        shuffle: Whether to shuffle examples between epochs.\n        sort: Whether to sort examples according to self.sort_key.\n            Note that repeat, shuffle, and sort default to train, train, and\n            (not train).\n        device: Device to create batches on. Use -1 for CPU and None for the\n            currently active GPU device.\n    \"\"\"\n\n    def __len__(self):\n        return math.floor(\n            (len(self.dataset[0].text) / self.batch_size - 1) / self.bptt_len)\n\n    def __iter__(self):\n        text = self.dataset[0].text\n        TEXT = self.dataset.fields['text']\n        TEXT.eos_token = None\n        pad_num = int(math.ceil(len(text) / self.batch_size) * self.batch_size \\\n                      - len(text))\n        text = text + ([TEXT.pad_token] * pad_num)\n        data = TEXT.numericalize([text], device=self.device)\n        data = data.view(self.batch_size, -1).contiguous()\n        dataset = Dataset(examples=self.dataset.examples,\n                          fields=[('text', TEXT), ('target', TEXT)])\n        while True:\n            for i in range(0, len(self) * self.bptt_len, self.bptt_len):\n                self.iterations += 1\n                seq_len = self.bptt_len\n                yield Batch.fromvars(\n                    dataset, self.batch_size,\n                    text=data[:, i:i + seq_len],\n                    target=data[:, i + 1:i + 1 + seq_len])\n            if not self.repeat:\n                return\n"
  },
  {
    "path": "texar_repo/examples/torchtext/config_small.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"PTB LM small size config.\n\"\"\"\n\n# pylint: disable=invalid-name, too-few-public-methods, missing-docstring\n\ninit_scale = 0.1\nnum_epochs = 13\nhidden_size = 200\nkeep_prob = 1.0\nbatch_size = 20\nnum_steps = 20\n\ncell = {\n    \"type\": \"LSTMBlockCell\",\n    \"kwargs\": {\n        \"num_units\": hidden_size,\n        \"forget_bias\": 0.\n    },\n    \"dropout\": {\"output_keep_prob\": keep_prob},\n    \"num_layers\": 2\n}\nemb = {\n    \"dim\": hidden_size\n}\nopt = {\n    \"optimizer\": {\n        \"type\": \"GradientDescentOptimizer\",\n        \"kwargs\": {\"learning_rate\": 1.0}\n    },\n    \"gradient_clip\": {\n        \"type\": \"clip_by_global_norm\",\n        \"kwargs\": {\"clip_norm\": 5.}\n    },\n    \"learning_rate_decay\": {\n        \"type\": \"exponential_decay\",\n        \"kwargs\": {\n            \"decay_steps\": 1,\n            \"decay_rate\": 0.5,\n            \"staircase\": True\n        },\n        \"start_decay_step\": 3\n    }\n}\n"
  },
  {
    "path": "texar_repo/examples/torchtext/lm_torchtext.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\" Language Modeling example using torchtext\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport time\nimport importlib\nimport numpy as np\nimport tensorflow as tf\nimport texar as tx\n\nfrom torchtext import data\nfrom torchtext import datasets\n\nfrom batchfirst_bptt import BatchFirstBPTTIterator\n\n# pylint: disable=invalid-name, too-many-locals, no-member\n\nflags = tf.flags\n\nflags.DEFINE_string(\"data_path\", \"./\",\n                    \"Directory containing PTB raw data (e.g., ptb.train.txt). \"\n                    \"E.g., ./simple-examples/data. If not exists, \"\n                    \"the directory will be created and PTB raw data will \"\n                    \"be downloaded.\")\nflags.DEFINE_string(\"config\", \"config_small\", \"The config to use.\")\n\nFLAGS = flags.FLAGS\n\nconfig = importlib.import_module(FLAGS.config)\n\n\ndef _main(_):\n    # Data\n    batch_size = config.batch_size\n    num_steps = config.num_steps\n\n    # setup vocabulary and data iterators with torchtext\n    TEXT = data.Field()\n    # make splits for data\n    train, valid, test = datasets.PennTreebank.splits(TEXT)\n\n    # build the vocabulary\n    TEXT.build_vocab(train, vectors=None)\n    vocab_size = len(TEXT.vocab)\n\n    # make iterator for splits\n    train_iter, valid_iter, test_iter = BatchFirstBPTTIterator.splits(\n        (train, valid, test), batch_size=batch_size, bptt_len=num_steps,\n        repeat=False)\n\n    inputs = tf.placeholder(tf.int32, [batch_size, num_steps])\n    targets = tf.placeholder(tf.int32, [batch_size, num_steps])\n\n    # Model architecture\n    initializer = tf.random_uniform_initializer(\n        -config.init_scale, config.init_scale)\n    with tf.variable_scope(\"model\", initializer=initializer):\n        embedder = tx.modules.WordEmbedder(\n            vocab_size=vocab_size, hparams=config.emb)\n        emb_inputs = embedder(inputs)\n        if config.keep_prob < 1:\n            emb_inputs = tf.nn.dropout(\n                emb_inputs, tx.utils.switch_dropout(config.keep_prob))\n\n        decoder = tx.modules.BasicRNNDecoder(\n            vocab_size=vocab_size, hparams={\"rnn_cell\": config.cell})\n        initial_state = decoder.zero_state(batch_size, tf.float32)\n        outputs, final_state, seq_lengths = decoder(\n            decoding_strategy=\"train_greedy\",\n            impute_finished=True,\n            inputs=emb_inputs,\n            sequence_length=[num_steps] * batch_size,\n            initial_state=initial_state)\n\n    # Losses & train ops\n    mle_loss = tx.losses.sequence_sparse_softmax_cross_entropy(\n        labels=targets,\n        logits=outputs.logits,\n        sequence_length=seq_lengths)\n\n    # Use global_step to pass epoch, for lr decay\n    global_step = tf.placeholder(tf.int32)\n    train_op = tx.core.get_train_op(\n        mle_loss, global_step=global_step, increment_global_step=False,\n        hparams=config.opt)\n\n    def _run_epoch(sess, data_iter, epoch, is_train=False, verbose=False):\n        start_time = time.time()\n        loss = 0.\n        iters = 0\n        state = sess.run(initial_state)\n\n        fetches = {\n            \"mle_loss\": mle_loss,\n            \"final_state\": final_state,\n        }\n        if is_train:\n            fetches[\"train_op\"] = train_op\n\n        mode = (tf.estimator.ModeKeys.TRAIN\n                if is_train\n                else tf.estimator.ModeKeys.EVAL)\n        epoch_size = (len(train) // batch_size - 1) // num_steps\n        for step, data_batch in enumerate(data_iter):\n            feed_dict = {\n                inputs: data_batch.text,\n                targets: data_batch.target,\n                global_step: epoch,\n                tx.global_mode(): mode,\n            }\n            for i, (c, h) in enumerate(initial_state):\n                feed_dict[c] = state[i].c\n                feed_dict[h] = state[i].h\n\n            rets = sess.run(fetches, feed_dict)\n            loss += rets[\"mle_loss\"]\n            state = rets[\"final_state\"]\n            iters += num_steps\n\n            ppl = np.exp(loss / iters)\n            if verbose and step % (epoch_size // 10) == 10:\n                print(\"%.3f perplexity: %.3f speed: %.0f wps\" %\n                      (step * 1.0 / epoch_size, ppl,\n                       iters * batch_size / (time.time() - start_time)))\n\n        ppl = np.exp(loss / iters)\n        return ppl\n\n    with tf.Session() as sess:\n        sess.run(tf.global_variables_initializer())\n        sess.run(tf.local_variables_initializer())\n        sess.run(tf.tables_initializer())\n\n        for epoch in range(config.num_epochs):\n            # Train\n            train_ppl = _run_epoch(\n                sess, train_iter, epoch, is_train=True, verbose=True)\n            print(\"Epoch: %d Train Perplexity: %.3f\" % (epoch, train_ppl))\n            # Valid\n            valid_ppl = _run_epoch(sess, valid_iter, epoch)\n            print(\"Epoch: %d Valid Perplexity: %.3f\" % (epoch, valid_ppl))\n        # Test\n        test_ppl = _run_epoch(sess, test_iter, 0)\n        print(\"Test Perplexity: %.3f\" % (test_ppl))\n\n\nif __name__ == '__main__':\n    tf.app.run(main=_main)\n"
  },
  {
    "path": "texar_repo/examples/torchtext/requirements.txt",
    "content": "# also make sure install PyTorch 0.4.0 or newer. \ntorchtext >= 0.2.3"
  },
  {
    "path": "texar_repo/examples/transformer/README.md",
    "content": "# Transformer for Machine Translation #\n\nThis is an implementation of the Transformer model described in [Vaswani, Ashish, et al. \"Attention is all you need.\"](http://papers.nips.cc/paper/7181-attention-is-all-you-need.pdf).\n\n[Quick Start](https://github.com/asyml/texar/tree/master/examples/transformer#quick-start): Prerequisites & use on machine translation datasets\n\n[Run Your Customized Experiments](https://github.com/asyml/texar/tree/master/examples/transformer#run-your-customized-experiments): Hands-on tutorial of data preparation, configuration, and model training/test\n\n## Quick Start ##\n\n### Prerequisites ###\n\nRun the following cmd to install necessary packages for the example: \n```\npip install -r requirements.txt\n```\n\n### Datasets ###\n\nTwo example datasets are provided:\n- IWSLT'15 **EN-VI** for English-Vietnamese translation\n- WMT'14 **EN-DE** for English-German translation\n\nDownload and pre-process the **IWSLT'15 EN-VI** data with the following cmds: \n```\nsh scripts/iwslt15_en_vi.sh \nsh preprocess_data.sh spm en vi\n```\nBy default, the downloaded dataset is in `./data/en_vi`. \nAs with the [official implementation](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/models/transformer.py), `spm` (`sentencepiece`) encoding is used to encode the raw text as data pre-processing. The encoded data is by default in `./temp/run_en_vi_spm`. \n\nFor the **WMT'14 EN-DE** data, download and pre-process with:\n```\nsh scripts/wmt14_en_de.sh\nsh preprocess_data.sh bpe en de\n```\n\nBy default, the downloaded dataset is in `./data/en_de`.\nNote that for this dataset, `bpe` encoding (Byte pair encoding) is used instead. The encoded data is by default in `./temp/run_en_de_bpe`. \n\n### Train and evaluate the model ###\n\nTrain the model with the cmd:\n```\npython transformer_main.py --run_mode=train_and_evaluate --config_model=config_model --config_data=config_iwslt15\n```\n* Specify `--model_dir` to dump model checkpoints, training logs, and tensorboard summaries to a desired directory. By default it is set to `./outputs`. \n* Specifying `--model_dir` will also restore the latest model checkpoint under the directory, if any checkpoint is there.\n* Specify `--config_data=config_wmt14` to train on the WMT'14 data.\n\n### Test a trained model ###\n\nTo only evaluate a model checkpoint without training, first load the checkpoint and generate samples: \n```\npython transformer_main.py --run_mode=test --config_data=config_iwslt15 --model_dir=./outputs\n```\nThe latest checkpoint in `./outputs` is used. Generated samples are in the file `./outputs/test.output.hyp`, and reference sentences are in the file `./outputs/test.output.ref` \n\nNext, decode the samples with respective decoder, and evaluate with `bleu_tool`:\n```\n../../bin/utils/spm_decode --infile ./outputs/test.output.hyp --outfile temp/test.output.spm --model temp/run_en_vi_spm/data/spm-codes.32000.model --input_format=piece \n\npython bleu_tool.py --reference=data/en_vi/test.vi --translation=temp/test.output.spm\n```\n\nFor WMT'14, the corresponding cmds are:\n```\n# Loads model and generates samples\npython transformer_main.py --run_mode=test --config_data=config_wmt14 --log_dir=./outputs\n\n# BPE decoding\ncat outputs/test.output.hyp | sed -E 's/(@@ )|(@@ ?$)//g' > temp/test.output.bpe\n\n# Evaluates BLEU\npython bleu_tool.py --reference=data/en_de/test.de --translation=temp/test.output.bpe\n```\n\n### Results\n\n* On IWSLT'15, the implementation achieves around `BLEU_cased=28.54` and `BLEU_uncased=29.30` (by [bleu_tool.py](./bleu_tool.py)), which are comparable to the base_single_gpu results by the [official implementation](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/models/transformer.py) (`28.12` and `28.97`, respectively, as reported [here](https://github.com/tensorflow/tensor2tensor/pull/611)).\n\n* On WMT'14, the implementation achieves around `BLEU_cased=25.12` (setting: base_single_gpu, batch_size=3072).\n\n\n### Example training log\n\n```\n12:02:02,686:INFO:step:500 loss: 7.3735\n12:04:20,035:INFO:step:1000 loss:6.1502\n12:06:37,550:INFO:step:1500 loss:5.4877\n```\nUsing an Nvidia GTX 1080Ti, the model usually converges within 5 hours (~15 epochs) on IWSLT'15.\n\n---\n\n## Run Your Customized Experiments\n\nHere is an hands-on tutorial on running Transformer with your own customized dataset.\n\n### 1. Prepare raw data\n\nCreate a data directory and put the raw data in the directory. To be compatible with the data preprocessing in the next step, you may follow the convention below:\n\n* The data directory should be named as `data/${src}_${tgt}/`. Take the data downloaded with `scripts/iwslt15_en_vi.sh` for example, the data directory is `data/en_vi`.\n* The raw data should have 6 files, which contain source and target sentences of training/dev/test sets, respectively. In the `iwslt15_en_vi` example, `data/en_vi/train.en` contains the source sentences of the training set, where each line is a sentence. Other files are `train.vi`, `dev.en`, `dev.vi`, `test.en`, `test.vi`. \n\n### 2. Preprocess the data\n\nTo obtain the processed dataset, run\n```\npreprocess_data.sh ${encoder} ${src} ${tgt} ${vocab_size} ${max_seq_length}\n```\nwhere\n\n* The `encoder` parameter can be `bpe`(byte pairwise encoding), `spm` (sentence piece encoding), or\n`raw`(no subword encoding).\n* `vocab_size` is optional. The default is 32000. \n  - At this point, this parameter is used only when `encoder` is set to `bpe` or `spm`. For `raw` encoding, you'd have to truncate the vocabulary by yourself.\n  - For `spm` encoding, the preprocessing may fail (due to the Python sentencepiece module) if `vocab_size` is too large. So you may want to try smaller `vocab_size` if it happens. \n* `max_seq_length` is optional. The default is 70.\n\nIn the `iwslt15_en_vi` example, the cmd is `sh preprocess_data.sh spm en vi`.\n\nBy default, the preprocessed data are dumped under `temp/run_${src}_${tgt}_${encoder}`. In the `iwslt15_en_vi` example, the directory is `temp/run_en_vi_spm`.\n\nIf you choose to use `raw` encoding method, notice that:\n\n- By default, the word embedding layer is built with the combination of source vocabulary and target vocabulary. For example, if the source vocabulary is of size 3K and the target vocabulary of size 3K and there is no overlap between the two vocabularies, then the final vocabulary used in the model is of size 6K.\n- By default, the final output layer of transformer decoder (hidden_state -> logits) shares the parameters with the word embedding layer.\n\n### 3. Specify data and model configuration\n\nCustomize the Python configuration files to config the model and data.\n\nPlease refer to the example configuration files `config_model.py` for model configuration and `config_iwslt15.py` for data configuration.\n\n### 4. Train the model\n\nTrain the model with the following cmd:\n```\npython transformer_main.py --run_mode=train_and_evaluate --config_model=custom_config_model --config_data=custom_config_data\n```\nwhere the model and data configuration files are `custom_config_model.py` and `custom_config_data.py`, respectively.\n\nOutputs such as model checkpoints are by default under `outputs/`.\n\n### 5. Test the model\n\nTest with the following cmd:\n```\npython transformer_main.py --run_mode=test --config_data=custom_config_data --model_dir=./outputs\n```\n\nGenerated samples on the test set are in `outputs/test.output.hyp`, and reference sentences are in `outputs/test.output.ref`. If you've used `bpe` or `spm` encoding in the data preprocessing step, the text in these files are in the respective encoding too. To decode, use the respective cmd:\n```\n# BPE decoding\ncat outputs/test.output.hyp | sed -E 's/(@@ )|(@@ ?$)//g' > temp/test.output.hyp.final\n\n# SPM decoding (take `iwslt15_en_vi` for example)\n../../bin/utils/spm_decode --infile ./outputs/test.output.hyp --outfile temp/test.output.hyp.final --model temp/run_en_vi_spm/data/spm-codes.32000.model --input_format=piece \n```\n\nFinally, to evaluate the BLEU score against the ground truth on the test set:\n```\npython bleu_tool.py --reference=you_reference_file --translation=temp/test.output.hyp.final\n```\nE.g., in the `iwslt15_en_vi` example, with `--reference=data/en_vi/test.vi`\n"
  },
  {
    "path": "texar_repo/examples/transformer/bleu_tool.py",
    "content": "# Copyright 2018 The Tensor2Tensor Authors.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#         http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n# Modifications copyright (C) 2018 Texar\n# ==============================================================================\n\"\"\"BLEU metric utililities used for MT eval.\n\nUsage: python bleu_tool.py --translation=my-wmt13.de --reference=wmt13_deen.de\n\"\"\"\n# This also:\n# Put compounds in ATAT format (comparable to papers like GNMT, ConvS2S).\n# See https://nlp.stanford.edu/projects/nmt/ :\n# 'Also, for historical reasons, we split compound words, e.g.,\n#    \"rich-text format\" --> rich ##AT##-##AT## text format.\"'\n# BLEU score will be similar to the one obtained using: mteval-v14.pl\n# Note:compound splitting is not implemented in this module\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nfrom argparse import ArgumentParser\nfrom io import open\nimport collections\nimport math\nimport re\nimport sys\nimport unicodedata\n\n# Dependency imports\n\nimport numpy as np\nimport six\n# pylint: disable=redefined-builtin\nfrom six.moves import xrange\nfrom six.moves import zip\n\n\n# pylint: enable=redefined-builtin\n\n\ndef _get_ngrams(segment, max_order):\n    \"\"\"Extracts all n-grams upto a given maximum order from an input segment.\n\n  Args:\n    segment: text segment from which n-grams will be extracted.\n    max_order: maximum length in tokens of the n-grams returned by this\n        methods.\n\n  Returns:\n    The Counter containing all n-grams upto max_order in segment\n    with a count of how many times each n-gram occurred.\n  \"\"\"\n    ngram_counts = collections.Counter()\n    for order in xrange(1, max_order + 1):\n        for i in xrange(0, len(segment) - order + 1):\n            ngram = tuple(segment[i:i + order])\n            ngram_counts[ngram] += 1\n    return ngram_counts\n\n\ndef compute_bleu(reference_corpus,\n                 translation_corpus,\n                 max_order=4,\n                 use_bp=True):\n    \"\"\"Computes BLEU score of translated segments against references.\n\n    Args:\n        reference_corpus: list of references for each translation. Each\n            reference should be tokenized into a list of tokens.\n        translation_corpus: list of translations to score. Each translation\n            should be tokenized into a list of tokens.\n        max_order: Maximum n-gram order to use when computing BLEU score.\n        use_bp: boolean, whether to apply brevity penalty.\n    Returns:\n        BLEU score.\n    \"\"\"\n\n    reference_length = 0\n    translation_length = 0\n    bp = 1.0\n    geo_mean = 0\n\n    matches_by_order = [0] * max_order\n    possible_matches_by_order = [0] * max_order\n    precisions = []\n\n    for (references, translations) in zip(reference_corpus, translation_corpus):\n        reference_length += len(references)\n        translation_length += len(translations)\n        ref_ngram_counts = _get_ngrams(references, max_order)\n        translation_ngram_counts = _get_ngrams(translations, max_order)\n\n        overlap = dict((ngram,\n                        min(count, translation_ngram_counts[ngram]))\n                       for ngram, count in ref_ngram_counts.items())\n\n        for ngram in overlap:\n            matches_by_order[len(ngram) - 1] += overlap[ngram]\n        for ngram in translation_ngram_counts:\n            possible_matches_by_order[len(ngram) - 1] += \\\n                translation_ngram_counts[ngram]\n    precisions = [0] * max_order\n    smooth = 1.0\n    for i in xrange(0, max_order):\n        if possible_matches_by_order[i] > 0:\n            precisions[i] = matches_by_order[i] / possible_matches_by_order[i]\n            if matches_by_order[i] > 0:\n                precisions[i] = matches_by_order[i] / \\\n                    possible_matches_by_order[i]\n            else:\n                smooth *= 2\n                precisions[i] = 1.0 / (smooth * possible_matches_by_order[i])\n        else:\n            precisions[i] = 0.0\n\n    if max(precisions) > 0:\n        p_log_sum = sum(math.log(p) for p in precisions if p)\n        geo_mean = math.exp(p_log_sum / max_order)\n\n    if use_bp:\n        ratio = translation_length / reference_length\n        if ratio == 0:\n            bp = 0\n        bp = math.exp(1 - 1. / ratio) if ratio < 1.0 else 1.0\n    bleu = geo_mean * bp\n    return np.float32(bleu)\n\n\nclass UnicodeRegex(object):\n    \"\"\"Ad-hoc hack to recognize all punctuation and symbols.\"\"\"\n    # pylint:disable=too-few-public-methods\n    def __init__(self):\n        punctuation = self.property_chars(\"P\")\n        self.nondigit_punct_re = re.compile(r\"([^\\d])([\" + punctuation + r\"])\")\n        self.punct_nondigit_re = re.compile(r\"([\" + punctuation + r\"])([^\\d])\")\n        self.symbol_re = re.compile(\"([\" + self.property_chars(\"S\") + \"])\")\n\n    def property_chars(self, prefix):\n        #pylint:disable=no-self-use\n        return \"\".join(six.unichr(x) for x in range(sys.maxunicode) \\\n            if unicodedata.category(six.unichr(x)).startswith(prefix))\n\n\nuregex = UnicodeRegex()\n\n\ndef bleu_tokenize(string):\n    r\"\"\"Tokenize a string following the official BLEU implementation.\n\n  See https://github.com/moses-smt/mosesdecoder/\"\n           \"blob/master/scripts/generic/mteval-v14.pl#L954-L983\n  In our case, the input string is expected to be just one line\n  and no HTML entities de-escaping is needed.\n  So we just tokenize on punctuation and symbols,\n  except when a punctuation is preceded and followed by a digit\n  (e.g. a comma/dot as a thousand/decimal separator).\n\n  Note that a numer (e.g. a year) followed by a dot at the end of sentence\n  is NOT tokenized,\n  i.e. the dot stays with the number because `s/(\\p{P})(\\P{N})/ $1 $2/g`\n  does not match this case (unless we add a space after each sentence).\n  However, this error is already in the original mteval-v14.pl\n  and we want to be consistent with it.\n\n  Args:\n    string: the input string\n\n  Returns:\n    a list of tokens\n  \"\"\"\n    string = uregex.nondigit_punct_re.sub(r\"\\1 \\2 \", string)\n    string = uregex.punct_nondigit_re.sub(r\" \\1 \\2\", string)\n    string = uregex.symbol_re.sub(r\" \\1 \", string)\n    return string.split()\n\n\ndef bleu_wrapper(ref_filename, hyp_filename, case_sensitive=False):\n    \"\"\"Compute BLEU for two files (reference and hypothesis translation).\"\"\"\n    ref_lines = open(ref_filename, encoding='utf-8').read().splitlines()\n    hyp_lines = open(hyp_filename, encoding='utf-8').read().splitlines()\n    assert len(ref_lines) == len(hyp_lines)\n    if not case_sensitive:\n        ref_lines = [x.lower() for x in ref_lines]\n        hyp_lines = [x.lower() for x in hyp_lines]\n    ref_tokens = [bleu_tokenize(x) for x in ref_lines]\n    hyp_tokens = [bleu_tokenize(x) for x in hyp_lines]\n    return compute_bleu(ref_tokens, hyp_tokens)\n\n\nif __name__ == \"__main__\":\n    parser = ArgumentParser(description='Compute BLEU score. \\\n        Usage: t2t-bleu --translation=my-wmt13.de --reference=wmt13_deen.de')\n\n    parser.add_argument('--translation', type=str)\n    parser.add_argument('--reference', type=str)\n    args = parser.parse_args()\n\n    bleu = 100 * bleu_wrapper(args.reference,\n                              args.translation,\n                              case_sensitive=False)\n    print(\"BLEU_uncased = %6.2f\" % bleu)\n    bleu = 100 * bleu_wrapper(args.reference,\n                              args.translation,\n                              case_sensitive=True)\n    print(\"BLEU_cased = %6.2f\" % bleu)\n"
  },
  {
    "path": "texar_repo/examples/transformer/config_iwslt15.py",
    "content": "batch_size = 2048\ntest_batch_size = 64\n\nmax_train_epoch = 20\ndisplay_steps = 500\neval_steps = 2000\n\nmax_decoding_length = 256\n\nfilename_prefix = \"processed.\"\ninput_dir = 'temp/run_en_vi_spm/data'\nvocab_file = input_dir + '/processed.vocab.pickle'\n"
  },
  {
    "path": "texar_repo/examples/transformer/config_model.py",
    "content": "\"\"\"Configurations of Transformer model\n\"\"\"\nimport copy\nimport texar as tx\n\nrandom_seed = 1234\nbeam_width = 5\nalpha = 0.6\nhidden_dim = 512\n\nemb = {\n    'name': 'lookup_table',\n    'dim': hidden_dim,\n    'initializer': {\n        'type': 'random_normal_initializer',\n        'kwargs': {\n            'mean': 0.0,\n            'stddev': hidden_dim**-0.5,\n        },\n    }\n}\n\nencoder = {\n    'dim': hidden_dim,\n    'num_blocks': 6,\n    'multihead_attention': {\n        'num_heads': 8,\n        'output_dim': hidden_dim\n        # See documentation for more optional hyperparameters\n    },\n    'position_embedder_hparams': {\n        'dim': hidden_dim\n    },\n    'initializer': {\n        'type': 'variance_scaling_initializer',\n        'kwargs': {\n            'scale': 1.0,\n            'mode': 'fan_avg',\n            'distribution': 'uniform',\n        },\n    },\n    'poswise_feedforward': tx.modules.default_transformer_poswise_net_hparams(\n        output_dim=hidden_dim)\n}\n\ndecoder = copy.deepcopy(encoder)\n\nloss_label_confidence = 0.9\n\nopt = {\n    'optimizer': {\n        'type': 'AdamOptimizer',\n        'kwargs': {\n            'beta1': 0.9,\n            'beta2': 0.997,\n            'epsilon': 1e-9\n        }\n    }\n}\n\nlr = {\n    'learning_rate_schedule': 'constant.linear_warmup.rsqrt_decay.rsqrt_depth',\n    'lr_constant': 2 * (hidden_dim ** -0.5),\n    'static_lr': 1e-3,\n    'warmup_steps': 16000,\n}\n"
  },
  {
    "path": "texar_repo/examples/transformer/config_wmt14.py",
    "content": "batch_size = 3072\ntest_batch_size = 64\n\nmax_train_epoch = 10\ndisplay_steps = 500\neval_steps = 2000\n\nmax_decoding_length= 256\n\nfilename_prefix = \"processed.\"\ninput_dir = 'temp/run_en_de_bpe/data'\nvocab_file = input_dir + '/processed.vocab.pickle'\n"
  },
  {
    "path": "texar_repo/examples/transformer/preprocess_data.sh",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#!/usr/bin/env bash\n\n###########################################################################\n\n# This file provides a script to preprocess raw text corpora to generate\n# vocabulary with sentence piece encoding or byte pairwise encoding.\n#\n# By default, the vocab size is 32000 and maximum sequence length is 70.\n###########################################################################\n\n\nTF=$(pwd)\n\nexport PATH=$PATH:$TF/../../bin/utils/\nencoder=$1\nsrc_language=$2\ntgt_language=$3\nvocab_size=${4:-32000}\nmax_seq_length=${5:-70}\n\n# update these variables\ndata=${TF}\"/data/${src_language}_${tgt_language}\"\nname=\"run_${src_language}_${tgt_language}_${encoder}\"\nout=\"temp/${name}\"\n\ntrain_src=$data/train.${src_language}\ntrain_tgt=$data/train.${tgt_language}\nvalid_src=$data/dev.${src_language}\nvalid_tgt=$data/dev.${tgt_language}\ntest_src=$data/test.${src_language}\ntest_tgt=$data/test.${tgt_language}\n\n#====== EXPERIMENT BEGIN ======\necho \"Output dir = $out\"\n[ -d $out ] || mkdir -p $out\n[ -d $out/data ] || mkdir -p $out/data\n[ -d $out/test ] || mkdir -p  $out/test\n\necho \"Step 1a: Preprocess inputs\"\n\ncase ${encoder} in\n    'spm')\n        echo \"Learning Word Piece on source and target combined\"\n        spm_train --input=${train_src},${train_tgt} --vocab_size ${vocab_size} --model_prefix=$out/data/spm-codes.${vocab_size}\n        spm_encode --model $out/data/spm-codes.${vocab_size}.model --output_format=piece --infile $train_src --outfile $out/data/train.${src_language}.spm\n        spm_encode --model $out/data/spm-codes.${vocab_size}.model --output_format=piece --infile $valid_src --outfile $out/data/valid.${src_language}.spm\n        spm_encode --model $out/data/spm-codes.${vocab_size}.model --output_format=piece --infile $test_src --outfile $out/data/test.${src_language}.spm\n        spm_encode --model $out/data/spm-codes.${vocab_size}.model --output_format=piece --infile $train_tgt --outfile $out/data/train.${tgt_language}.spm\n        spm_encode --model $out/data/spm-codes.${vocab_size}.model --output_format=piece --infile $valid_tgt --outfile $out/data/valid.${tgt_language}.spm\n        spm_encode --model $out/data/spm-codes.${vocab_size}.model --output_format=piece --infile ${test_tgt} --outfile $out/data/test.${tgt_language}.spm\n        cp ${test_tgt} ${out}/test/test.${tgt_language} ;;\n    'bpe'):\n        echo \"Learning Byte Pairwise on source and target combined\"\n        cat ${train_src} ${train_tgt} | learn_bpe -s ${vocab_size} > ${out}/data/bpe-codes.${vocab_size}\n        apply_bpe -c ${out}/data/bpe-codes.${vocab_size} < ${train_src} > $out/data/train.${src_language}.bpe\n        apply_bpe -c ${out}/data/bpe-codes.${vocab_size} < ${valid_src} > ${out}/data/valid.${src_language}.bpe\n        apply_bpe -c ${out}/data/bpe-codes.${vocab_size} < ${test_src} > ${out}/data/test.${src_language}.bpe\n        apply_bpe -c ${out}/data/bpe-codes.${vocab_size} < ${train_tgt} > $out/data/train.${tgt_language}.bpe\n        apply_bpe -c ${out}/data/bpe-codes.${vocab_size} < ${valid_tgt} > ${out}/data/valid.${tgt_language}.bpe\n        apply_bpe -c ${out}/data/bpe-codes.${vocab_size} < ${test_tgt} > ${out}/data/test.${tgt_language}.bpe\n        cp ${test_tgt} ${out}/test/test.${tgt_language} ;;\n    'raw'):\n        echo \"No subword encoding is applied, just copy the corpus files into correct directory\"\n        cp ${train_src} $out/data/train.${src_language}.raw\n        cp ${valid_src} $out/data/valid.${src_language}.raw\n        cp ${test_src} $out/data/test.${src_language}.raw\n        cp ${train_tgt} $out/data/train.${tgt_language}.raw\n        cp ${valid_tgt} $out/data/valid.${tgt_language}.raw\n        cp ${test_tgt} $out/data/test.${tgt_language}.raw\nesac\n# TODO(zhiting): Truncate vocab when encoder==raw\n\npython ${TF}/utils/preprocess.py -i ${out}/data \\\n    --src ${src_language}.${encoder} \\\n    --tgt ${tgt_language}.${encoder} \\\n    --save_data processed. \\\n    --max_seq_length=${max_seq_length} \\\n    --pre_encoding=${encoder}\n"
  },
  {
    "path": "texar_repo/examples/transformer/requirements.txt",
    "content": "torchtext\ntorch\nsentencepiece\n"
  },
  {
    "path": "texar_repo/examples/transformer/scripts/iwslt15_en_vi.sh",
    "content": "#!/bin/sh\n# Copied from https://github.com/tensorflow/nmt/blob/master/nmt/scripts/download_iwslt15.sh\n#\n# Download small-scale IWSLT15 Vietnames to English translation data for NMT\n# model training.\n#\n# Usage:\n#   ./download_iwslt15.sh path-to-output-dir\n#\n# If output directory is not specified, \"./iwslt15\" will be used as the default\n# output directory.\n\nOUT_DIR=\"${1:-data/en_vi}\"\nSITE_PREFIX=\"https://nlp.stanford.edu/projects/nmt/data\"\n\nmkdir -v -p $OUT_DIR\n\n# Download iwslt15 small dataset from standford website.\necho \"Download training dataset train.en and train.vi.\"\ncurl -o \"$OUT_DIR/train.en\" \"$SITE_PREFIX/iwslt15.en-vi/train.en\"\ncurl -o \"$OUT_DIR/train.vi\" \"$SITE_PREFIX/iwslt15.en-vi/train.vi\"\n\necho \"Download dev dataset tst2012.en and tst2012.vi.\"\ncurl -o \"$OUT_DIR/dev.en\" \"$SITE_PREFIX/iwslt15.en-vi/tst2012.en\"\ncurl -o \"$OUT_DIR/dev.vi\" \"$SITE_PREFIX/iwslt15.en-vi/tst2012.vi\"\n\necho \"Download test dataset tst2013.en and tst2013.vi.\"\ncurl -o \"$OUT_DIR/test.en\" \"$SITE_PREFIX/iwslt15.en-vi/tst2013.en\"\ncurl -o \"$OUT_DIR/test.vi\" \"$SITE_PREFIX/iwslt15.en-vi/tst2013.vi\"\n"
  },
  {
    "path": "texar_repo/examples/transformer/scripts/wmt14_en_de.sh",
    "content": "#!/usr/bin/env bash\n\n# This code was adapted from Tensorflow NMT toolkit on 03/24/2018.\n# URL: https://raw.githubusercontent.com/tensorflow/nmt/master/nmt/scripts/wmt16_en_de.sh\n\n# Copyright 2017 Google Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nset -e\nOUTPUT_DIR=\"data/en_de/\"\nDOWNLOADED_DATA_DIR=\"data/en_de_temp/\"\nOUTPUT_DIR_CACHE=\"${DOWNLOADED_DATA_DIR}/cache\"\necho \"Writing to ${OUTPUT_DIR_CACHE}. To change this, set the OUTPUT_DIR_CACHE environment variable.\"\nmkdir -p $DOWNLOADED_DATA_DIR\nmkdir -p ${OUTPUT_DIR}\nif [ ! -f ${DOWNLOADED_DATA_DIR}/europarl-v7-de-en.tgz ]; then\n    echo \"Downloading Europarl v7. This may take a while...\"\n    curl -o ${DOWNLOADED_DATA_DIR}/europarl-v7-de-en.tgz \\\n        http://www.statmt.org/europarl/v7/de-en.tgz\nelse\n    echo \"${DOWNLOADED_DATA_DIR}/europarl-v7-de-en.tgz already exists.\"\nfi\n\nif [ ! -f ${DOWNLOADED_DATA_DIR}/common-crawl.tgz ]; then\n    echo \"Downloading Common Crawl corpus. This may take a while...\"\n    curl -o ${DOWNLOADED_DATA_DIR}/common-crawl.tgz \\\n    http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz\nelse\n    echo \"${DOWNLOADED_DATA_DIR}/common-crawl.tgz already exists.\"\nfi\n\nif [ ! -f ${DOWNLOADED_DATA_DIR}/nc-v11.tgz ]; then\n    echo \"Downloading News Commentary v11. This may take a while...\"\n    curl -o ${DOWNLOADED_DATA_DIR}/nc-v11.tgz \\\n        http://data.statmt.org/wmt16/translation-task/training-parallel-nc-v11.tgz\nelse\n    echo \"${DOWNLOADED_DATA_DIR}/nc-v11.tgz already exists\"\nfi\n\nif [ ! -f ${DOWNLOADED_DATA_DIR}/dev.tgz ]; then\n    echo \"Downloading dev/test sets\"\n    curl -o ${DOWNLOADED_DATA_DIR}/dev.tgz \\\n        http://data.statmt.org/wmt16/translation-task/dev.tgz\nelse\n    echo \"${DOWNLOADED_DATA_DIR}/dev.tgz already exists\"\nfi\n\nif [ ! -f ${DOWNLOADED_DATA_DIR}/test.tgz ]; then\n    curl -o ${DOWNLOADED_DATA_DIR}/test.tgz \\\n        http://data.statmt.org/wmt16/translation-task/test.tgz\nelse\n    echo \"${DOWNLOADED_DATA_DIR}/test.tgz already exists\"\nfi\n\n# Extract everything\necho \"Extracting all files...\"\nif  [ ! -d ${DOWNLOADED_DATA_DIR}/europarl-v7-de-en ]; then\n    mkdir -p \"${DOWNLOADED_DATA_DIR}/europarl-v7-de-en\"\n    tar -xvzf \"${DOWNLOADED_DATA_DIR}/europarl-v7-de-en.tgz\" -C \"${DOWNLOADED_DATA_DIR}/europarl-v7-de-en\"\n    mkdir -p \"${DOWNLOADED_DATA_DIR}/common-crawl\"\n    tar -xvzf \"${DOWNLOADED_DATA_DIR}/common-crawl.tgz\" -C \"${DOWNLOADED_DATA_DIR}/common-crawl\"\n    mkdir -p \"${DOWNLOADED_DATA_DIR}/nc-v11\"\n    tar -xvzf \"${DOWNLOADED_DATA_DIR}/nc-v11.tgz\" -C \"${DOWNLOADED_DATA_DIR}/nc-v11\"\n    mkdir -p \"${DOWNLOADED_DATA_DIR}/dev\"\n    tar -xvzf \"${DOWNLOADED_DATA_DIR}/dev.tgz\" -C \"${DOWNLOADED_DATA_DIR}/dev\"\n    mkdir -p \"${DOWNLOADED_DATA_DIR}/test\"\n    tar -xvzf \"${DOWNLOADED_DATA_DIR}/test.tgz\" -C \"${DOWNLOADED_DATA_DIR}/test\"\nelse\n    echo \"the tar files have been unzipped\"\nfi\n\n# Concatenate Training data\nwc -l ${DOWNLOADED_DATA_DIR}/europarl-v7-de-en/europarl-v7.de-en.en\nwc -l ${DOWNLOADED_DATA_DIR}/common-crawl/commoncrawl.de-en.en\nwc -l ${DOWNLOADED_DATA_DIR}/nc-v11/training-parallel-nc-v11/news-commentary-v11.de-en.en\n\ncat \"${DOWNLOADED_DATA_DIR}/europarl-v7-de-en/europarl-v7.de-en.en\" \\\n  \"${DOWNLOADED_DATA_DIR}/common-crawl/commoncrawl.de-en.en\" \\\n  \"${DOWNLOADED_DATA_DIR}/nc-v11/training-parallel-nc-v11/news-commentary-v11.de-en.en\" \\\n  > \"${OUTPUT_DIR_CACHE}/train.en\" &&\\\nwc -l \"${OUTPUT_DIR_CACHE}/train.en\"\n\ncat \"${DOWNLOADED_DATA_DIR}/europarl-v7-de-en/europarl-v7.de-en.de\" \\\n  \"${DOWNLOADED_DATA_DIR}/common-crawl/commoncrawl.de-en.de\" \\\n  \"${DOWNLOADED_DATA_DIR}/nc-v11/training-parallel-nc-v11/news-commentary-v11.de-en.de\" \\\n  > \"${OUTPUT_DIR_CACHE}/train.de\" &&\\\nwc -l \"${OUTPUT_DIR_CACHE}/train.de\"\n\n# Clone Moses\nif [ ! -d \"${OUTPUT_DIR_CACHE}/mosesdecoder\" ]; then\n  echo \"Cloning moses for data processing\"\n  git clone https://github.com/moses-smt/mosesdecoder.git \"${OUTPUT_DIR_CACHE}/mosesdecoder\"\nfi\n\n${OUTPUT_DIR_CACHE}/mosesdecoder/scripts/ems/support/input-from-sgm.perl \\\n  < ${DOWNLOADED_DATA_DIR}/dev/dev/newstest2014-deen-src.de.sgm \\\n  > ${DOWNLOADED_DATA_DIR}/dev/dev/newstest2014.de\n${OUTPUT_DIR_CACHE}/mosesdecoder/scripts/ems/support/input-from-sgm.perl \\\n  < ${DOWNLOADED_DATA_DIR}/dev/dev/newstest2014-deen-ref.en.sgm \\\n  > ${DOWNLOADED_DATA_DIR}/dev/dev/newstest2014.en\n\n# Copy dev/test data to output dir\ncp ${DOWNLOADED_DATA_DIR}/dev/dev/newstest20*.de ${OUTPUT_DIR_CACHE}\ncp ${DOWNLOADED_DATA_DIR}/dev/dev/newstest20*.en ${OUTPUT_DIR_CACHE}\n\n# Tokenize data\nfor f in ${OUTPUT_DIR_CACHE}/*.de; do\n  echo \"Tokenizing $f...\"\n  ${OUTPUT_DIR_CACHE}/mosesdecoder/scripts/tokenizer/tokenizer.perl -q -l de -threads 8 < $f > ${f%.*}.tok.de\ndone\n\nfor f in ${OUTPUT_DIR_CACHE}/*.en; do\n  echo \"Tokenizing $f...\"\n  ${OUTPUT_DIR_CACHE}/mosesdecoder/scripts/tokenizer/tokenizer.perl -q -l en -threads 8 < $f > ${f%.*}.tok.en\ndone\n\n# Clean train corpora\nfor f in ${OUTPUT_DIR_CACHE}/train.tok.en; do\n  fbase=${f%.*}\n  echo \"Cleaning ${fbase}...\"\n  ${OUTPUT_DIR_CACHE}/mosesdecoder/scripts/training/clean-corpus-n.perl $fbase de en \"${fbase}.clean\" 1 80\ndone\n\ncp ${OUTPUT_DIR_CACHE}/train.tok.clean.en ${OUTPUT_DIR}/train.en\ncp ${OUTPUT_DIR_CACHE}/train.tok.clean.de ${OUTPUT_DIR}/train.de\ncp ${OUTPUT_DIR_CACHE}/newstest2013.tok.en ${OUTPUT_DIR}/dev.en\ncp ${OUTPUT_DIR_CACHE}/newstest2013.tok.de ${OUTPUT_DIR}/dev.de\ncp ${OUTPUT_DIR_CACHE}/newstest2014.tok.en ${OUTPUT_DIR}/test.en\ncp ${OUTPUT_DIR_CACHE}/newstest2014.tok.de ${OUTPUT_DIR}/test.de\n"
  },
  {
    "path": "texar_repo/examples/transformer/transformer_main.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Transformer model.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport pickle\nimport random\nimport os\nimport importlib\nfrom torchtext import data\nimport tensorflow as tf\nimport texar as tx\nfrom texar.modules import TransformerEncoder, TransformerDecoder\nfrom texar.utils import transformer_utils\n\nfrom utils import data_utils, utils\nfrom utils.preprocess import bos_token_id, eos_token_id\nfrom bleu_tool import bleu_wrapper\n# pylint: disable=invalid-name, too-many-locals\n\nflags = tf.flags\n\nflags.DEFINE_string(\"config_model\", \"config_model\", \"The model config.\")\nflags.DEFINE_string(\"config_data\", \"config_iwslt15\", \"The dataset config.\")\nflags.DEFINE_string(\"run_mode\", \"train_and_evaluate\",\n                    \"Either train_and_evaluate or test.\")\nflags.DEFINE_string(\"model_dir\", \"./outputs\",\n                    \"Directory to save the trained model and logs.\")\n\nFLAGS = flags.FLAGS\n\nconfig_model = importlib.import_module(FLAGS.config_model)\nconfig_data = importlib.import_module(FLAGS.config_data)\n\nutils.set_random_seed(config_model.random_seed)\n\ndef main():\n    \"\"\"Entrypoint.\n    \"\"\"\n    # Load data\n    train_data, dev_data, test_data = data_utils.load_data_numpy(\n        config_data.input_dir, config_data.filename_prefix)\n    with open(config_data.vocab_file, 'rb') as f:\n        id2w = pickle.load(f)\n    vocab_size = len(id2w)\n\n    beam_width = config_model.beam_width\n\n    # Create logging\n    tx.utils.maybe_create_dir(FLAGS.model_dir)\n    logging_file = os.path.join(FLAGS.model_dir, 'logging.txt')\n    logger = utils.get_logger(logging_file)\n    print('logging file is saved in: %s', logging_file)\n\n    # Build model graph\n    encoder_input = tf.placeholder(tf.int64, shape=(None, None))\n    decoder_input = tf.placeholder(tf.int64, shape=(None, None))\n    # (text sequence length excluding padding)\n    encoder_input_length = tf.reduce_sum(\n        1 - tf.to_int32(tf.equal(encoder_input, 0)), axis=1)\n    decoder_input_length = tf.reduce_sum(\n        1 - tf.to_int32(tf.equal(decoder_input, 0)), axis=1)\n\n    labels = tf.placeholder(tf.int64, shape=(None, None))\n    is_target = tf.to_float(tf.not_equal(labels, 0))\n\n    global_step = tf.Variable(0, dtype=tf.int64, trainable=False)\n    learning_rate = tf.placeholder(tf.float64, shape=(), name='lr')\n\n    embedder = tx.modules.WordEmbedder(\n        vocab_size=vocab_size, hparams=config_model.emb)\n    encoder = TransformerEncoder(hparams=config_model.encoder)\n\n    encoder_output = encoder(inputs=embedder(encoder_input),\n                             sequence_length=encoder_input_length)\n\n    # The decoder ties the input word embedding with the output logit layer.\n    # As the decoder masks out <PAD>'s embedding, which in effect means\n    # <PAD> has all-zero embedding, so here we explicitly set <PAD>'s embedding\n    # to all-zero.\n    tgt_embedding = tf.concat(\n        [tf.zeros(shape=[1, embedder.dim]), embedder.embedding[1:, :]], axis=0)\n    decoder = TransformerDecoder(embedding=tgt_embedding,\n                                 hparams=config_model.decoder)\n    # For training\n    outputs = decoder(\n        memory=encoder_output,\n        memory_sequence_length=encoder_input_length,\n        inputs=embedder(decoder_input),\n        sequence_length=decoder_input_length,\n        decoding_strategy='train_greedy',\n        mode=tf.estimator.ModeKeys.TRAIN\n    )\n\n    mle_loss = transformer_utils.smoothing_cross_entropy(\n        outputs.logits, labels, vocab_size, config_model.loss_label_confidence)\n    mle_loss = tf.reduce_sum(mle_loss * is_target) / tf.reduce_sum(is_target)\n\n    train_op = tx.core.get_train_op(\n        mle_loss,\n        learning_rate=learning_rate,\n        global_step=global_step,\n        hparams=config_model.opt)\n\n    tf.summary.scalar('lr', learning_rate)\n    tf.summary.scalar('mle_loss', mle_loss)\n    summary_merged = tf.summary.merge_all()\n\n    # For inference\n    start_tokens = tf.fill([tx.utils.get_batch_size(encoder_input)],\n                           bos_token_id)\n    predictions = decoder(\n        memory=encoder_output,\n        memory_sequence_length=encoder_input_length,\n        decoding_strategy='infer_greedy',\n        beam_width=beam_width,\n        alpha=config_model.alpha,\n        start_tokens=start_tokens,\n        end_token=eos_token_id,\n        max_decoding_length=config_data.max_decoding_length,\n        mode=tf.estimator.ModeKeys.PREDICT\n    )\n    if beam_width <= 1:\n        inferred_ids = predictions[0].sample_id\n    else:\n        # Uses the best sample by beam search\n        inferred_ids = predictions['sample_id'][:, :, 0]\n\n\n    saver = tf.train.Saver(max_to_keep=5)\n    best_results = {'score': 0, 'epoch': -1}\n\n    def _eval_epoch(sess, epoch, mode):\n        if mode == 'eval':\n            eval_data = dev_data\n        elif mode == 'test':\n            eval_data = test_data\n        else:\n            raise ValueError('`mode` should be either \"eval\" or \"test\".')\n\n        references, hypotheses = [], []\n        bsize = config_data.test_batch_size\n        for i in range(0, len(eval_data), bsize):\n            sources, targets = zip(*eval_data[i:i+bsize])\n            x_block = data_utils.source_pad_concat_convert(sources)\n            feed_dict = {\n                encoder_input: x_block,\n                tx.global_mode(): tf.estimator.ModeKeys.EVAL,\n            }\n            fetches = {\n                'inferred_ids': inferred_ids,\n            }\n            fetches_ = sess.run(fetches, feed_dict=feed_dict)\n\n            hypotheses.extend(h.tolist() for h in fetches_['inferred_ids'])\n            references.extend(r.tolist() for r in targets)\n            hypotheses = utils.list_strip_eos(hypotheses, eos_token_id)\n            references = utils.list_strip_eos(references, eos_token_id)\n\n        if mode == 'eval':\n            # Writes results to files to evaluate BLEU\n            # For 'eval' mode, the BLEU is based on token ids (rather than\n            # text tokens) and serves only as a surrogate metric to monitor\n            # the training process\n            fname = os.path.join(FLAGS.model_dir, 'tmp.eval')\n            hypotheses = tx.utils.str_join(hypotheses)\n            references = tx.utils.str_join(references)\n            hyp_fn, ref_fn = tx.utils.write_paired_text(\n                hypotheses, references, fname, mode='s')\n            eval_bleu = bleu_wrapper(ref_fn, hyp_fn, case_sensitive=True)\n            eval_bleu = 100. * eval_bleu\n            logger.info('epoch: %d, eval_bleu %.4f', epoch, eval_bleu)\n            print('epoch: %d, eval_bleu %.4f' % (epoch, eval_bleu))\n\n            if eval_bleu > best_results['score']:\n                logger.info('epoch: %d, best bleu: %.4f', epoch, eval_bleu)\n                best_results['score'] = eval_bleu\n                best_results['epoch'] = epoch\n                model_path = os.path.join(FLAGS.model_dir, 'best-model.ckpt')\n                logger.info('saving model to %s', model_path)\n                print('saving model to %s' % model_path)\n                saver.save(sess, model_path)\n\n        elif mode == 'test':\n            # For 'test' mode, together with the cmds in README.md, BLEU\n            # is evaluated based on text tokens, which is the standard metric.\n            fname = os.path.join(FLAGS.model_dir, 'test.output')\n            hwords, rwords = [], []\n            for hyp, ref in zip(hypotheses, references):\n                hwords.append([id2w[y] for y in hyp])\n                rwords.append([id2w[y] for y in ref])\n            hwords = tx.utils.str_join(hwords)\n            rwords = tx.utils.str_join(rwords)\n            hyp_fn, ref_fn = tx.utils.write_paired_text(\n                hwords, rwords, fname, mode='s',\n                src_fname_suffix='hyp', tgt_fname_suffix='ref')\n            logger.info('Test output writtn to file: %s', hyp_fn)\n            print('Test output writtn to file: %s' % hyp_fn)\n\n    def _train_epoch(sess, epoch, step, smry_writer):\n        random.shuffle(train_data)\n        train_iter = data.iterator.pool(\n            train_data,\n            config_data.batch_size,\n            key=lambda x: (len(x[0]), len(x[1])),\n            batch_size_fn=utils.batch_size_fn,\n            random_shuffler=data.iterator.RandomShuffler())\n\n        for _, train_batch in enumerate(train_iter):\n            in_arrays = data_utils.seq2seq_pad_concat_convert(train_batch)\n            feed_dict = {\n                encoder_input: in_arrays[0],\n                decoder_input: in_arrays[1],\n                labels: in_arrays[2],\n                learning_rate: utils.get_lr(step, config_model.lr)\n            }\n            fetches = {\n                'step': global_step,\n                'train_op': train_op,\n                'smry': summary_merged,\n                'loss': mle_loss,\n            }\n\n            fetches_ = sess.run(fetches, feed_dict=feed_dict)\n\n            step, loss = fetches_['step'], fetches_['loss']\n            if step and step % config_data.display_steps == 0:\n                logger.info('step: %d, loss: %.4f', step, loss)\n                print('step: %d, loss: %.4f' % (step, loss))\n                smry_writer.add_summary(fetches_['smry'], global_step=step)\n\n            if step and step % config_data.eval_steps == 0:\n                _eval_epoch(sess, epoch, mode='eval')\n        return step\n\n    # Run the graph\n    with tf.Session() as sess:\n        sess.run(tf.global_variables_initializer())\n        sess.run(tf.local_variables_initializer())\n        sess.run(tf.tables_initializer())\n\n        smry_writer = tf.summary.FileWriter(FLAGS.model_dir, graph=sess.graph)\n\n        if FLAGS.run_mode == 'train_and_evaluate':\n            logger.info('Begin running with train_and_evaluate mode')\n\n            if tf.train.latest_checkpoint(FLAGS.model_dir) is not None:\n                logger.info('Restore latest checkpoint in %s' % FLAGS.model_dir)\n                saver.restore(sess, tf.train.latest_checkpoint(FLAGS.model_dir))\n\n            step = 0\n            for epoch in range(config_data.max_train_epoch):\n                step = _train_epoch(sess, epoch, step, smry_writer)\n\n        elif FLAGS.run_mode == 'test':\n            logger.info('Begin running with test mode')\n\n            logger.info('Restore latest checkpoint in %s' % FLAGS.model_dir)\n            saver.restore(sess, tf.train.latest_checkpoint(FLAGS.model_dir))\n\n            _eval_epoch(sess, 0, mode='test')\n\n        else:\n            raise ValueError('Unknown mode: {}'.format(FLAGS.run_mode))\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "texar_repo/examples/transformer/utils/__init__.py",
    "content": ""
  },
  {
    "path": "texar_repo/examples/transformer/utils/data_utils.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Data read/write utilities for Transformer.\n\"\"\"\nimport os\nimport codecs\nimport six\nimport numpy as np\n\n# pylint: disable=no-member\n\ndef load_data_numpy(input_dir, prefix):\n    train_data = np.load(os.path.join(input_dir,\\\n        prefix + 'train.npy'), encoding='latin1').tolist()\n    dev_data = np.load(os.path.join(input_dir,\\\n        prefix + 'valid.npy'), encoding='latin1').tolist()\n    test_data = np.load(os.path.join(input_dir,\\\n        prefix + 'test.npy'), encoding='latin1').tolist()\n    print('train data size:{}'.format(len(train_data)))\n    return train_data, dev_data, test_data\n\ndef seq2seq_pad_concat_convert(xy_batch, eos_id=2, bos_id=1):\n    \"\"\"\n    Args:\n        xy_batch (list of tuple of two numpy.ndarray-s or cupy.ndarray-s):\n            xy_batch[i][0] is an array\n            of token ids of i-th input sentence in a minibatch.\n            xy_batch[i][1] is an array\n            of token ids of i-th target sentence in a minibatch.\n            The shape of each array is `(sentence length, )`.\n        eos_id: The index of end-of-sentence special token in the\n            dictionary.\n\n    Returns:\n        Tuple of Converted array.\n            (input_sent_batch_array, target_sent_batch_input_array,\n            target_sent_batch_output_array).\n            The shape of each array is `(batchsize, max_sentence_length)`.\n            All sentences are padded with 0 to reach max_sentence_length.\n    \"\"\"\n\n    x_seqs, y_seqs = zip(*xy_batch)\n    x_block = _concat_examples(x_seqs, padding=0)\n    y_block = _concat_examples(y_seqs, padding=0)\n\n    # Add EOS\n    x_block = np.pad(x_block, ((0, 0), (0, 1)), 'constant',\n                     constant_values=0)\n    for i_batch, seq in enumerate(x_seqs):\n        x_block[i_batch, len(seq)] = eos_id\n\n    y_out_block = np.pad(y_block, ((0, 0), (0, 1)), 'constant',\n                         constant_values=0)\n    for i_batch, seq in enumerate(y_seqs):\n        y_out_block[i_batch, len(seq)] = eos_id\n\n    # Add BOS in target language\n    y_in_block = np.pad(y_block, ((0, 0), (1, 0)), 'constant',\n                        constant_values=bos_id)\n    return x_block, y_in_block, y_out_block\n\ndef source_pad_concat_convert(x_seqs, eos_id=2, bos_id=1):\n    \"\"\"\n    This function is used when testing the model without target input.\n    \"\"\"\n    x_block = _concat_examples(x_seqs, padding=0)\n\n    # add EOS\n    x_block = np.pad(x_block, ((0, 0), (0, 1)), 'constant', constant_values=0)\n    for i_batch, seq in enumerate(x_seqs):\n        x_block[i_batch, len(seq)] = eos_id\n    return x_block\n\ndef _concat_examples(arrays, padding=0):\n    if len(arrays) == 0:\n        raise ValueError('batch is empty')\n\n    first_elem = arrays[0]\n    assert isinstance(first_elem, np.ndarray)\n\n    shape = np.array(arrays[0].shape, dtype=int)\n    for array in arrays[1:]:\n        if np.any(shape != array.shape):\n            np.maximum(shape, array.shape, shape)\n    shape = tuple(np.insert(shape, 0, len(arrays)))\n\n    result = np.full(shape, padding, dtype=arrays[0].dtype)\n    for i in six.moves.range(len(arrays)):\n        src = arrays[i]\n        slices = tuple(slice(dim) for dim in src.shape)\n        result[(i,) + slices] = src\n    return result\n\ndef write_words(words_list, filename):\n    with codecs.open(filename, 'w+', 'utf-8') as myfile:\n        for words in words_list:\n            myfile.write(' '.join(words) + '\\n')\n\n"
  },
  {
    "path": "texar_repo/examples/transformer/utils/preprocess.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\npreprocessing text data. Generally it's to generate plain text vocab file,\ntruncate sequence by length, generate the preprocessed dataset.\n\"\"\"\nfrom __future__ import unicode_literals\nimport collections\nimport re\nimport json\nimport os\nimport numpy as np\nimport pickle\nimport argparse\nfrom io import open\n#pylint:disable=invalid-name\n\nsplit_pattern = re.compile(r'([.,!?\"\\':;)(])')\ndigit_pattern = re.compile(r'\\d')\n\n# Refer to https://texar.readthedocs.io/en/latest/_modules/texar/data/vocabulary.html#SpecialTokens\n# these tokens will by default have token ids 0, 1, 2, 3 respectively\npad_token_id, bos_token_id, eos_token_id, unk_token_id = 0, 1, 2, 3\n\ndef split_sentence(s, tok=False):\n    \"\"\"split sentence with some segmentation rules.\"\"\"\n    if tok:\n        s = s.lower()\n        s = s.replace('\\u2019', \"'\")\n        s = digit_pattern.sub('0', s)\n    words = []\n    for word in s.split():\n        if tok:\n            words.extend(split_pattern.split(word))\n        else:\n            words.append(word)\n    words = [w for w in words if w]\n    return words\n\n\ndef open_file(path):\n    \"\"\"more robust open function\"\"\"\n    return open(path, encoding='utf-8')\n\ndef read_file(path, tok=False):\n    \"\"\"a generator to generate each line of file.\"\"\"\n    with open_file(path) as f:\n        for line in f.readlines():\n            words = split_sentence(line.strip(), tok)\n            yield words\n\n\ndef count_words(path, max_vocab_size=40000, tok=False):\n    \"\"\"count all words in the corpus and output a counter\"\"\"\n    counts = collections.Counter()\n    for words in read_file(path, tok):\n        for word in words:\n            counts[word] += 1\n\n    vocab = [word for (word, _) in counts.most_common(max_vocab_size)]\n    return vocab\n\ndef make_array(word_id, words):\n    \"\"\"generate id numpy array from plain text words.\"\"\"\n    ids = [word_id.get(word, unk_token_id) for word in words]\n    return np.array(ids, 'i')\n\ndef make_dataset(path, w2id, tok=False):\n    \"\"\"generate dataset.\"\"\"\n    dataset, npy_dataset = [], []\n    token_count, unknown_count = 0, 0\n    for words in read_file(path, tok):\n        array = make_array(w2id, words)\n        npy_dataset.append(array)\n        dataset.append(words)\n        token_count += array.size\n        unknown_count += (array == unk_token_id).sum()\n    print('# of tokens:{}'.format(token_count))\n    print('# of unknown {} {:.2}'.format(unknown_count,\\\n        100. * unknown_count / token_count))\n    return dataset, npy_dataset\n\ndef get_preprocess_args():\n    \"\"\"Data preprocessing options.\"\"\"\n    class Config(): pass\n    config = Config()\n    parser = argparse.ArgumentParser(description='Preprocessing Options')\n    parser.add_argument('--source_vocab', type=int, default=40000,\n                        help='Vocabulary size of source language')\n    parser.add_argument('--target_vocab', type=int, default=40000,\n                        help='Vocabulary size of target language')\n    parser.add_argument('--tok', dest='tok', action='store_true',\n                        help='tokenized and lowercased')\n    parser.set_defaults(tok=False)\n    parser.add_argument('--max_seq_length', type=int, default=70)\n    parser.add_argument('--pre_encoding', type=str, default='spm')\n    parser.add_argument('--src', type=str, default='en')\n    parser.add_argument('--tgt', type=str, default='vi')\n    parser.add_argument('--input_dir', '-i', type=str, \\\n        default='./data/en_vi/data/', help='Input directory')\n    parser.add_argument('--save_data', type=str, default='preprocess', \\\n        help='Output file for the prepared data')\n    parser.parse_args(namespace=config)\n\n    #keep consistent with original implementation\n    #pylint:disable=attribute-defined-outside-init\n    config.input = config.input_dir\n    config.source_train = 'train.' + config.src\n    config.target_train = 'train.' + config.tgt\n    config.source_valid = 'valid.' + config.src\n    config.target_valid = 'valid.' + config.tgt\n    config.source_test = 'test.'+ config.src\n    config.target_test = 'test.' + config.tgt\n    return config\n\nif __name__ == \"__main__\":\n    args = get_preprocess_args()\n\n    print(json.dumps(args.__dict__, indent=4))\n\n    #pylint:disable=no-member\n    # Vocab Construction\n    source_path = os.path.join(args.input_dir, args.source_train)\n    target_path = os.path.join(args.input_dir, args.target_train)\n\n    src_cntr = count_words(source_path, args.source_vocab, args.tok)\n    trg_cntr = count_words(target_path, args.target_vocab, args.tok)\n    all_words = sorted(list(set(src_cntr + trg_cntr)))\n\n    vocab = ['<pad>', '<bos>', '<eos>', '<unk>'] + all_words\n\n    w2id = {word: index for index, word in enumerate(vocab)}\n\n    # Train Dataset\n    source_data, source_npy = make_dataset(source_path, w2id, args.tok)\n    target_data, target_npy = make_dataset(target_path, w2id, args.tok)\n    assert len(source_data) == len(target_data)\n\n    train_data = [(s, t) for s, t in zip(source_data, target_data)\n                  if s and len(s) < args.max_seq_length\n                  and t and len(t) < args.max_seq_length]\n    train_npy = [(s, t) for s, t in zip(source_npy, target_npy)\n                 if len(s) > 0 and len(s) < args.max_seq_length\n                 and len(t) > 0 and len(t) < args.max_seq_length]\n    assert len(train_data) == len(train_npy)\n\n    # Display corpus statistics\n    print(\"Vocab: {} with special tokens\".format(len(vocab)))\n    print('Original training data size: %d' % len(source_data))\n    print('Filtered training data size: %d' % len(train_data))\n\n    # Valid Dataset\n    source_path = os.path.join(args.input_dir, args.source_valid)\n    source_data, source_npy = make_dataset(source_path, w2id, args.tok)\n    target_path = os.path.join(args.input_dir, args.target_valid)\n    target_data, target_npy = make_dataset(target_path, w2id, args.tok)\n    assert len(source_data) == len(target_data)\n\n    valid_data = [(s, t) for s, t in zip(source_data, target_data)\n                  if s and t]\n    valid_npy = [(s, t) for s, t in zip(source_npy, target_npy)\n                 if len(s) > 0 and len(t) > 0]\n    assert len(valid_data) == len(valid_npy)\n    print('Original dev data size: %d' % len(source_data))\n    print('Filtered dev data size: %d' % len(valid_data))\n\n    # Test Dataset\n    source_path = os.path.join(args.input_dir, args.source_test)\n    source_data, source_npy = make_dataset(source_path, w2id, args.tok)\n    target_path = os.path.realpath(\n        os.path.join(args.input_dir, args.target_test))\n    target_data, target_npy = make_dataset(target_path, w2id, args.tok)\n    assert len(source_data) == len(target_data)\n    test_data = [(s, t) for s, t in zip(source_data, target_data)\n                 if s and t]\n    test_npy = [(s, t) for s, t in zip(source_npy, target_npy)\n                if len(s)>0 and len(t)>0]\n    print('Original test data size: %d' % len(source_data))\n    print('Filtered test data size: %d' % len(test_data))\n    id2w = {i: w for w, i in w2id.items()}\n    # Save the dataset to numpy files\n    train_src_output = os.path.join(args.input_dir, \\\n        args.save_data + 'train.' + args.src+ '.txt')\n    train_tgt_output = os.path.join(args.input_dir, \\\n        args.save_data + 'train.' + args.tgt + '.txt')\n    dev_src_output = os.path.join(args.input_dir, \\\n        args.save_data + 'dev.' + args.src+ '.txt')\n    dev_tgt_output = os.path.join(args.input_dir, \\\n        args.save_data + 'dev.' + args.tgt+ '.txt')\n    test_src_output = os.path.join(args.input_dir, \\\n        args.save_data + 'test.' + args.src+ '.txt')\n    test_tgt_output = os.path.join(args.input_dir, \\\n        args.save_data + 'test.' + args.tgt + '.txt')\n\n    np.save(os.path.join(args.input, args.save_data + 'train.npy'),\n            train_npy)\n    np.save(os.path.join(args.input, args.save_data + 'valid.npy'),\n            valid_npy)\n    np.save(os.path.join(args.input, args.save_data + 'test.npy'),\n            test_npy)\n    with open(os.path.join(args.input, args.save_data + 'vocab.pickle'), 'wb')\\\n        as f:\n        pickle.dump(id2w, f, protocol=pickle.HIGHEST_PROTOCOL)\n\n    with open(train_src_output, 'w+', encoding='utf-8') as fsrc, \\\n        open(train_tgt_output, 'w+', encoding='utf-8') as ftgt:\n        for words in train_data:\n            fsrc.write('{}\\n'.format(' '.join(words[0])))\n            ftgt.write('{}\\n'.format(' '.join(words[1])))\n    with open(dev_src_output, 'w+', encoding='utf-8') as fsrc, \\\n        open(dev_tgt_output, 'w+', encoding='utf-8') as ftgt:\n        for words in valid_data:\n            fsrc.write('{}\\n'.format(' '.join(words[0])))\n            ftgt.write('{}\\n'.format(' '.join(words[1])))\n    with open(test_src_output, 'w+', encoding='utf-8') as fsrc, \\\n        open(test_tgt_output, 'w+', encoding='utf-8') as ftgt:\n        for words in test_data:\n            fsrc.write('{}\\n'.format(' '.join(words[0])))\n            ftgt.write('{}\\n'.format(' '.join(words[1])))\n    with open(os.path.join(args.input_dir, \\\n            args.save_data + args.pre_encoding + '.vocab.text'), 'w+', encoding='utf-8') as f:\n        max_size = len(id2w)\n        for idx in range(4, max_size):\n            f.write('{}\\n'.format(id2w[idx]))\n"
  },
  {
    "path": "texar_repo/examples/transformer/utils/utils.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nHelper functions for model training.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport random\nimport math\nimport logging\nimport numpy as np\nimport tensorflow as tf\n\ndef set_random_seed(myseed):\n    tf.set_random_seed(myseed)\n    np.random.seed(myseed)\n    random.seed(myseed)\n\ndef batch_size_fn(new, count, size_so_far):\n    max_src_in_batch, max_tgt_in_batch = 0, 0\n    max_src_in_batch = max(max_src_in_batch, len(new[0] + 1))\n    max_tgt_in_batch = max(max_tgt_in_batch, len(new[1] + 1))\n    src_elements = count * max_src_in_batch\n    tgt_elements = count * max_tgt_in_batch\n    return max(src_elements, tgt_elements)\n\ndef get_lr(fstep, opt_config):\n    if opt_config['learning_rate_schedule'] == 'static':\n        lr = opt_config['static_lr']\n    else:\n        lr = opt_config['lr_constant'] \\\n            * min(1.0, (fstep / opt_config['warmup_steps'])) \\\n            * (1 / math.sqrt(max(fstep, opt_config['warmup_steps'])))\n    return lr\n\ndef get_logger(log_path):\n    \"\"\"Returns a logger.\n\n    Args:\n        log_path (str): Path to the log file.\n    \"\"\"\n    logger = logging.getLogger(__name__)\n    logger.setLevel(logging.DEBUG)\n    fh = logging.FileHandler(log_path)\n    fh.setLevel(logging.DEBUG)\n    fh.setFormatter(\n        logging.Formatter('%(asctime)s:%(levelname)s:%(message)s'))\n    logger.addHandler(fh)\n    return logger\n\ndef list_strip_eos(list_, eos_token):\n    \"\"\"Strips EOS token from a list of lists of tokens.\n    \"\"\"\n    list_strip = []\n    for elem in list_:\n        if eos_token in elem:\n            elem = elem[:elem.index(eos_token)]\n        list_strip.append(elem)\n    return list_strip\n\n"
  },
  {
    "path": "texar_repo/examples/vae_text/README.md",
    "content": "# Variational Autoencoder (VAE) for Text Generation\n\nThis example builds a VAE for text generation, with an LSTM as encoder and an LSTM or [Transformer](https://arxiv.org/pdf/1706.03762.pdf) as decoder. Training is performed on the official PTB data and Yahoo data, respectively. \n\nThe VAE with LSTM decoder is first decribed in [(Bowman et al., 2015) Generating Sentences from a Continuous Space](https://arxiv.org/pdf/1511.06349.pdf)\n\nThe Yahoo dataset is from [(Yang et al., 2017) Improved Variational Autoencoders for Text Modeling using Dilated Convolutions](https://arxiv.org/pdf/1702.08139.pdf), which is created by sampling 100k documents from the original Yahoo Answer data. The average document length is 78 and the vocab size is 200k. \n\n## Data\nThe datasets can be downloaded by running:\n```shell\npython prepare_data.py --data ptb\npython prepare_data.py --data yahoo\n```\n\n## Training\nTrain with the following command:\n\n```shell\npython vae_train.py --config config_trans_ptb\n```\n\nHere:\n\n* `--config` specifies the config file to use, including model hyperparameters and data paths. We provide 4 config files:\n  - [config_lstm_ptb.py](./config_lstm_ptb.py): LSTM decoder, on the PTB data\n  - [config_lstm_yahoo.py](./config_lstm_yahoo.py): LSTM decoder, on the Yahoo data\n  - [config_trans_ptb.py](./config_trans_ptb.py): Transformer decoder, on the PTB data\n  - [config_trans_yahoo.py](./config_trans_yahoo.py): Transformer decoder, on the Yahoo data\n\n## Generation\nGenerating sentences with pre-trained model can be performed with the following command:\n```shell\npython vae_train.py --config config_file --mode predict --model /path/to/model.ckpt --out /path/to/output\n```\n\nHere `--model` specifies the saved model checkpoint, which is saved in `./models/dataset_name/` at training time. For example, the model path is `./models/ptb/ptb_lstmDecoder.ckpt` when generating with a LSTM decoder trained on PTB dataset. Generated sentences will be written to standard output if `--out` is not specifcied.\n\n## Results\n\n### Language Modeling\n\n|Dataset    |Metrics   | VAE-LSTM |VAE-Transformer |\n|---------------|-------------|----------------|------------------------|\n|Yahoo | Test PPL<br>Test NLL | 68.11<br>337.13 |59.95<br>326.93|\n|PTB | Test PPL<br>Test NLL | 104.61<br>101.92 | 103.68<br>101.72 |\n\n### Generated Examples\nWe show the generated examples with transformer as decoder trained  on PTB training data.\n\n|Examples|\n|:---------|\n|i 'm always looking at a level of \\$ N to \\$ N billion \\<EOS\\> |\n|after four years ago president bush has federal regulators decided to file financing for the waiver\\<EOS\\> |\n|the savings & loan association said total asset revenue was about \\$ N billion compared with \\$ N billion \\<EOS\\> |\n|the trend would seem to be effective \\<EOS\\> |\n|chicago city 's <unk> computer bank of britain posted a N N jump in third-quarter net income \\<EOS\\>|\n"
  },
  {
    "path": "texar_repo/examples/vae_text/config_lstm_ptb.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\n\"\"\"VAE config.\n\"\"\"\n\n# pylint: disable=invalid-name, too-few-public-methods, missing-docstring\n\ndataset = \"ptb\"\nnum_epochs = 100\nhidden_size = 256\ndec_dropout_in = 0.5\ndec_dropout_out = 0.5\nenc_dropout_in = 0.\nenc_dropout_out = 0.\nword_keep_prob = 0.5\nbatch_size = 32\nembed_dim = 256\n\nlatent_dims = 32\n\nlr_decay_hparams = {\n    \"init_lr\": 0.001,\n    \"threshold\": 2,\n    \"decay_factor\": 0.5,\n    \"max_decay\": 5\n}\n\n\ndecoder_hparams = {\n    \"type\": \"lstm\"\n}\n\nenc_cell_hparams = {\n    \"type\": \"LSTMBlockCell\",\n    \"kwargs\": {\n        \"num_units\": hidden_size,\n        \"forget_bias\": 0.\n    },\n    \"dropout\": {\"output_keep_prob\": 1. - enc_dropout_out},\n    \"num_layers\": 1\n}\n\ndec_cell_hparams = {\n    \"type\": \"LSTMBlockCell\",\n    \"kwargs\": {\n        \"num_units\": hidden_size,\n        \"forget_bias\": 0.\n    },\n    \"dropout\": {\"output_keep_prob\": 1. - dec_dropout_out},\n    \"num_layers\": 1\n}\n\nenc_emb_hparams = {\n    'name': 'lookup_table',\n    \"dim\": embed_dim,\n    \"dropout_rate\": enc_dropout_in,\n    'initializer' : {\n        'type': 'random_normal_initializer',\n        'kwargs': {\n            'mean': 0.0,\n            'stddev': embed_dim**-0.5,\n        },\n    }\n}\n\ndec_emb_hparams = {\n    'name': 'lookup_table',\n    \"dim\": embed_dim,\n    \"dropout_rate\": dec_dropout_in,\n    'initializer' : {\n        'type': 'random_normal_initializer',\n        'kwargs': {\n            'mean': 0.0,\n            'stddev': embed_dim**-0.5,\n        },\n    }\n}\n\n# KL annealing\nkl_anneal_hparams={\n    \"warm_up\": 10,\n    \"start\": 0.1\n}\n\ntrain_data_hparams = {\n    \"num_epochs\": 1,\n    \"batch_size\": batch_size,\n    \"seed\": 123,\n    \"dataset\": {\n        \"files\": './simple-examples/data/ptb.train.txt',\n        \"vocab_file\": './simple-examples/data/vocab.txt'\n    }\n}\n\nval_data_hparams = {\n    \"num_epochs\": 1,\n    \"batch_size\": batch_size,\n    \"seed\": 123,\n    \"dataset\": {\n        \"files\": './simple-examples/data/ptb.valid.txt',\n        \"vocab_file\": './simple-examples/data/vocab.txt'\n    }\n}\n\ntest_data_hparams = {\n    \"num_epochs\": 1,\n    \"batch_size\": batch_size,\n    \"dataset\": {\n        \"files\": './simple-examples/data/ptb.test.txt',\n        \"vocab_file\": './simple-examples/data/vocab.txt'\n    }\n}\n\nopt_hparams = {\n    \"optimizer\": {\n        \"type\": \"AdamOptimizer\",\n        \"kwargs\": {\n            \"learning_rate\": 0.001\n        }\n    },\n    \"gradient_clip\": {\n        \"type\": \"clip_by_global_norm\",\n        \"kwargs\": {\"clip_norm\": 5.}\n    }\n}\n"
  },
  {
    "path": "texar_repo/examples/vae_text/config_lstm_yahoo.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\n\"\"\"VAE config.\n\"\"\"\n\n# pylint: disable=invalid-name, too-few-public-methods, missing-docstring\n\ndataset = \"yahoo\"\nnum_epochs = 100\nhidden_size = 550\ndec_dropout_in = 0.5\ndec_dropout_out = 0.5\nenc_dropout_in = 0.\nenc_dropout_out = 0.\nbatch_size = 32\nembed_dim = 512\n\nlatent_dims = 32\n\nlr_decay_hparams = {\n    \"init_lr\": 0.001,\n    \"threshold\": 2,\n    \"decay_factor\": 0.5,\n    \"max_decay\": 5\n}\n\n\nrelu_dropout = 0.2\nembedding_dropout = 0.2\nattention_dropout = 0.2\nresidual_dropout = 0.2\nnum_blocks = 3\n\ndecoder_hparams = {\n    \"type\": \"lstm\"\n}\n\nenc_cell_hparams = {\n    \"type\": \"LSTMBlockCell\",\n    \"kwargs\": {\n        \"num_units\": hidden_size,\n        \"forget_bias\": 0.\n    },\n    \"dropout\": {\"output_keep_prob\": 1. - enc_dropout_out},\n    \"num_layers\": 1\n}\n\ndec_cell_hparams = {\n    \"type\": \"LSTMBlockCell\",\n    \"kwargs\": {\n        \"num_units\": hidden_size,\n        \"forget_bias\": 0.\n    },\n    \"dropout\": {\"output_keep_prob\": 1. - dec_dropout_out},\n    \"num_layers\": 1\n}\n\nenc_emb_hparams = {\n    'name': 'lookup_table',\n    \"dim\": embed_dim,\n    \"dropout_rate\": enc_dropout_in,\n    'initializer' : {\n        'type': 'random_normal_initializer',\n        'kwargs': {\n            'mean': 0.0,\n            'stddev': embed_dim**-0.5,\n        },\n    }\n}\n\ndec_emb_hparams = {\n    'name': 'lookup_table',\n    \"dim\": embed_dim,\n    \"dropout_rate\": dec_dropout_in,\n    'initializer' : {\n        'type': 'random_normal_initializer',\n        'kwargs': {\n            'mean': 0.0,\n            'stddev': embed_dim**-0.5,\n        },\n    }\n}\n\n\n# KL annealing\n# kl_weight = 1.0 / (1 + np.exp(-k*(step-x0)))\nkl_anneal_hparams={\n    \"warm_up\": 10,\n    \"start\": 0.1\n}\n\ntrain_data_hparams = {\n    \"num_epochs\": 1,\n    \"batch_size\": batch_size,\n    \"seed\": 123,\n    \"dataset\": {\n        \"files\": './data/yahoo/yahoo.train.txt',\n        \"vocab_file\": './data/yahoo/vocab.txt'\n    }\n}\n\nval_data_hparams = {\n    \"num_epochs\": 1,\n    \"batch_size\": batch_size,\n    \"seed\": 123,\n    \"dataset\": {\n        \"files\": './data/yahoo/yahoo.valid.txt',\n        \"vocab_file\": './data/yahoo/vocab.txt'\n    }\n}\n\ntest_data_hparams = {\n    \"num_epochs\": 1,\n    \"batch_size\": batch_size,\n    \"dataset\": {\n        \"files\": './data/yahoo/yahoo.test.txt',\n        \"vocab_file\": './data/yahoo/vocab.txt'\n    }\n}\n\nopt_hparams = {\n    \"optimizer\": {\n        \"type\": \"AdamOptimizer\",\n        \"kwargs\": {\n            \"learning_rate\": 0.001\n        }\n    },\n    \"gradient_clip\": {\n        \"type\": \"clip_by_global_norm\",\n        \"kwargs\": {\"clip_norm\": 5.}\n    }\n}\n"
  },
  {
    "path": "texar_repo/examples/vae_text/config_trans_ptb.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\n\"\"\"VAE config.\n\"\"\"\n\n# pylint: disable=invalid-name, too-few-public-methods, missing-docstring\n\ndataset = \"ptb\"\nnum_epochs = 100\nhidden_size = 256\ndec_dropout_in = 0.\nenc_dropout_in = 0.\nenc_dropout_out = 0.\nbatch_size = 32\nembed_dim = 256\n\nlatent_dims = 32\n\nlr_decay_hparams = {\n    \"init_lr\": 0.001,\n    \"threshold\": 2,\n    \"decay_factor\": 0.5,\n    \"max_decay\": 5\n}\n\n\nrelu_dropout = 0.2\nembedding_dropout = 0.2\nattention_dropout = 0.2\nresidual_dropout = 0.2\nnum_blocks = 3\n\ndecoder_hparams = {\n    \"type\": \"transformer\"\n}\n\nenc_cell_hparams = {\n    \"type\": \"LSTMBlockCell\",\n    \"kwargs\": {\n        \"num_units\": hidden_size,\n        \"forget_bias\": 0.\n    },\n    \"dropout\": {\"output_keep_prob\": 1. - enc_dropout_out},\n    \"num_layers\": 1\n}\n\nenc_emb_hparams = {\n    'name': 'lookup_table',\n    \"dim\": embed_dim,\n    \"dropout_rate\": enc_dropout_in,\n    'initializer' : {\n        'type': 'random_normal_initializer',\n        'kwargs': {\n            'mean': 0.0,\n            'stddev': embed_dim**-0.5,\n        },\n    }\n}\n\ndec_emb_hparams = {\n    'name': 'lookup_table',\n    \"dim\": embed_dim,\n    \"dropout_rate\": dec_dropout_in,\n    'initializer' : {\n        'type': 'random_normal_initializer',\n        'kwargs': {\n            'mean': 0.0,\n            'stddev': embed_dim**-0.5,\n        },\n    }\n}\n\n# due to the residual connection, the embed_dim should be equal to hidden_size\ntrans_hparams = {\n    'output_layer_bias': False,\n    'embedding_dropout': embedding_dropout,\n    'residual_dropout': residual_dropout,\n    'num_blocks': num_blocks,\n    'dim': hidden_size,\n    'position_embedder_hparams': {\n        'dim': hidden_size,\n    },\n    'initializer': {\n        'type': 'variance_scaling_initializer',\n        'kwargs': {\n            'scale': 1.0,\n            'mode': 'fan_avg',\n            'distribution': 'uniform',\n        },\n    },\n    'multihead_attention': {\n        'dropout_rate': attention_dropout,\n        'num_heads': 8,\n        'num_units': hidden_size,\n        'output_dim': hidden_size\n    },\n    'poswise_feedforward': {\n        'name': 'fnn',\n        'layers': [\n            {\n                'type': 'Dense',\n                'kwargs': {\n                    'name': 'conv1',\n                    'units': hidden_size*4,\n                    'activation': 'relu',\n                    'use_bias': True,\n                },\n            },\n            {\n                'type': 'Dropout',\n                'kwargs': {\n                    'rate': relu_dropout,\n                }\n            },\n            {\n                'type': 'Dense',\n                'kwargs': {\n                    'name': 'conv2',\n                    'units': hidden_size,\n                    'use_bias': True,\n                    }\n            }\n        ],\n    }\n}\n\n# KL annealing\nkl_anneal_hparams = {\n    \"warm_up\": 10,\n    \"start\": 0.1\n}\n\ntrain_data_hparams = {\n    \"num_epochs\": 1,\n    \"batch_size\": batch_size,\n    \"seed\": 123,\n    \"dataset\": {\n        \"files\": './simple-examples/data/ptb.train.txt',\n        \"vocab_file\": './simple-examples/data/vocab.txt'\n    }\n}\n\nval_data_hparams = {\n    \"num_epochs\": 1,\n    \"batch_size\": batch_size,\n    \"seed\": 123,\n    \"dataset\": {\n        \"files\": './simple-examples/data/ptb.valid.txt',\n        \"vocab_file\": './simple-examples/data/vocab.txt'\n    }\n}\n\ntest_data_hparams = {\n    \"num_epochs\": 1,\n    \"batch_size\": batch_size,\n    \"dataset\": {\n        \"files\": './simple-examples/data/ptb.test.txt',\n        \"vocab_file\": './simple-examples/data/vocab.txt'\n    }\n}\n\nopt_hparams = {\n    \"optimizer\": {\n        \"type\": \"AdamOptimizer\",\n        \"kwargs\": {\n            \"learning_rate\": 0.001\n        }\n    },\n    \"gradient_clip\": {\n        \"type\": \"clip_by_global_norm\",\n        \"kwargs\": {\"clip_norm\": 5.}\n    }\n}\n"
  },
  {
    "path": "texar_repo/examples/vae_text/config_trans_yahoo.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\n\"\"\"VAE config.\n\"\"\"\n\n# pylint: disable=invalid-name, too-few-public-methods, missing-docstring\n\ndataset = \"yahoo\"\nnum_epochs = 100\nhidden_size = 512\ndec_dropout_in = 0.\nenc_dropout_in = 0.\nenc_dropout_out = 0.\nbatch_size = 32\nembed_dim = 512\n\nlatent_dims = 32\n\nlr_decay_hparams = {\n    \"init_lr\": 0.001,\n    \"threshold\": 2,\n    \"decay_factor\": 0.5,\n    \"max_decay\": 5\n}\n\n\nrelu_dropout = 0.2\nembedding_dropout = 0.2\nattention_dropout = 0.2\nresidual_dropout = 0.2\nnum_blocks = 3\n\ndecoder_hparams = {\n    \"type\": \"transformer\"\n}\n\nenc_cell_hparams = {\n    \"type\": \"LSTMBlockCell\",\n    \"kwargs\": {\n        \"num_units\": hidden_size,\n        \"forget_bias\": 0.\n    },\n    \"dropout\": {\"output_keep_prob\": 1. - enc_dropout_out},\n    \"num_layers\": 1\n}\n\nenc_emb_hparams = {\n    'name': 'lookup_table',\n    \"dim\": embed_dim,\n    \"dropout_rate\": enc_dropout_in,\n    'initializer' : {\n        'type': 'random_normal_initializer',\n        'kwargs': {\n            'mean': 0.0,\n            'stddev': embed_dim**-0.5,\n        },\n    }\n}\n\ndec_emb_hparams = {\n    'name': 'lookup_table',\n    \"dim\": embed_dim,\n    \"dropout_rate\": dec_dropout_in,\n    'initializer' : {\n        'type': 'random_normal_initializer',\n        'kwargs': {\n            'mean': 0.0,\n            'stddev': embed_dim**-0.5,\n        },\n    }\n}\n\n# due to the residual connection, the embed_dim should be equal to hidden_size\ntrans_hparams = {\n    'output_layer_bias': False,\n    'embedding_dropout': embedding_dropout,\n    'residual_dropout': residual_dropout,\n    'num_blocks': num_blocks,\n    'dim': hidden_size,\n    'initializer': {\n        'type': 'variance_scaling_initializer',\n        'kwargs': {\n            'scale': 1.0,\n            'mode':'fan_avg',\n            'distribution':'uniform',\n        },\n    },\n    'multihead_attention': {\n        'dropout_rate': attention_dropout,\n        'num_heads': 8,\n        'num_units': hidden_size,\n        'output_dim': hidden_size\n    },\n    'poswise_feedforward': {\n        'name':'fnn',\n        'layers':[\n            {\n                'type':'Dense',\n                'kwargs': {\n                    'name':'conv1',\n                    'units':hidden_size*4,\n                    'activation':'relu',\n                    'use_bias':True,\n                },\n            },\n            {\n                'type':'Dropout',\n                'kwargs': {\n                    'rate': relu_dropout,\n                }\n            },\n            {\n                'type':'Dense',\n                'kwargs': {\n                    'name':'conv2',\n                    'units':hidden_size,\n                    'use_bias':True,\n                    }\n            }\n        ],\n    }\n}\n\n# KL annealing\nkl_anneal_hparams={\n    \"warm_up\": 10,\n    \"start\": 0.1\n}\n\ntrain_data_hparams = {\n    \"num_epochs\": 1,\n    \"batch_size\": batch_size,\n    \"seed\": 123,\n    \"dataset\": {\n        \"files\": './data/yahoo/yahoo.train.txt',\n        \"vocab_file\": './data/yahoo/vocab.txt'\n    }\n}\n\nval_data_hparams = {\n    \"num_epochs\": 1,\n    \"batch_size\": batch_size,\n    \"seed\": 123,\n    \"dataset\": {\n        \"files\": './data/yahoo/yahoo.valid.txt',\n        \"vocab_file\": './data/yahoo/vocab.txt'\n    }\n}\n\ntest_data_hparams = {\n    \"num_epochs\": 1,\n    \"batch_size\": batch_size,\n    \"dataset\": {\n        \"files\": './data/yahoo/yahoo.test.txt',\n        \"vocab_file\": './data/yahoo/vocab.txt'\n    }\n}\n\nopt_hparams = {\n    \"optimizer\": {\n        \"type\": \"AdamOptimizer\",\n        \"kwargs\": {\n            \"learning_rate\": 0.001\n        }\n    },\n    \"gradient_clip\": {\n        \"type\": \"clip_by_global_norm\",\n        \"kwargs\": {\"clip_norm\": 5.}\n    }\n}\n\n"
  },
  {
    "path": "texar_repo/examples/vae_text/prepare_data.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Utilities for downloading and preprocessing the PTB and Yohoo data.\n\"\"\"\nimport os\nimport argparse\n\nimport tensorflow as tf\nimport texar as tx\n\ndef prepare_data(data_name):\n    \"\"\"Prepare datasets.\n    Args:\n        data_path: the path to save the data\n        data_name: the name of dataset, \"ptb\" and \"yahoo\"\n            are currently supported\n    \"\"\"\n    if data_name == \"ptb\":\n        data_path = \"./simple-examples/data\"\n        train_path = os.path.join(data_path, \"ptb.train.txt\")\n        if not tf.gfile.Exists(train_path):\n            url = 'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz'\n            tx.data.maybe_download(url, './', extract=True)\n\n        train_path = os.path.join(data_path, \"ptb.train.txt\")\n        vocab_path = os.path.join(data_path, \"vocab.txt\")\n        word_to_id = tx.data.make_vocab(\n            train_path, return_type=\"dict\")\n\n        with open(vocab_path, 'w') as fvocab:\n            for word in word_to_id:\n                fvocab.write(\"%s\\n\" % word)\n\n    elif data_name == \"yahoo\":\n        data_path = \"./data/yahoo\"\n        train_path = os.path.join(data_path, \"yahoo.train.txt\")\n        if not tf.gfile.Exists(train_path):\n            url = 'https://drive.google.com/file/d/'\\\n                  '13IsiffVjcQ-wrrbBGMwiG3sYf-DFxtXH/view?usp=sharing'\n            tx.data.maybe_download(url, path='./', filenames='yahoo.zip',\n                                   extract=True)\n    else:\n        raise ValueError('Unknown data: {}'.format(data_name))\n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser(description='prepare data')\n    parser.add_argument('--data', type=str, help='dataset to prepare')\n    args = parser.parse_args()\n    prepare_data(args.data)\n"
  },
  {
    "path": "texar_repo/examples/vae_text/vae_train.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Example for building the Variational Autoencoder.\n\nThis is an impmentation of Variational Autoencoder for text generation\n\nTo run:\n\n$ python vae_train.py\n\nHyperparameters and data path may be specified in config_trans.py\n\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\n# pylint: disable=invalid-name, no-member, too-many-locals\n# pylint: disable=too-many-branches, too-many-statements, redefined-variable-type\n\nimport os\nimport sys\nimport time\nimport importlib\nfrom io import open\n\nimport numpy as np\nimport tensorflow as tf\nimport tensorflow_probability as tfp\nimport texar as tx\n\n\ntfd = tfp.distributions\n\nflags = tf.flags\n\nflags.DEFINE_string(\"config\", \"config\", \"The config to use.\")\nflags.DEFINE_string(\"mode\", \"train\", \"train or predict\")\nflags.DEFINE_string(\"model\", None, \"model path for generating sentences\")\nflags.DEFINE_string(\"out\", None, \"generation output path\")\n\nFLAGS = flags.FLAGS\n\nconfig = importlib.import_module(FLAGS.config)\n\ndef kl_dvg(means, logvars):\n    \"\"\"compute the KL divergence between Gaussian distribution\n    \"\"\"\n    kl_cost = -0.5 * (logvars - tf.square(means) -\n                      tf.exp(logvars) + 1.0)\n    kl_cost = tf.reduce_mean(kl_cost, 0)\n\n    return tf.reduce_sum(kl_cost)\n\n\ndef _main(_):\n    # Data\n    train_data = tx.data.MonoTextData(config.train_data_hparams)\n    val_data = tx.data.MonoTextData(config.val_data_hparams)\n    test_data = tx.data.MonoTextData(config.test_data_hparams)\n    iterator = tx.data.TrainTestDataIterator(train=train_data,\n                                             val=val_data,\n                                             test=test_data)\n    data_batch = iterator.get_next()\n\n    opt_vars = {\n        'learning_rate': config.lr_decay_hparams[\"init_lr\"],\n        'best_valid_nll': 1e100,\n        'steps_not_improved': 0,\n        'kl_weight': config.kl_anneal_hparams[\"start\"]\n    }\n\n    decay_cnt = 0\n    max_decay = config.lr_decay_hparams[\"max_decay\"]\n    decay_factor = config.lr_decay_hparams[\"decay_factor\"]\n    decay_ts = config.lr_decay_hparams[\"threshold\"]\n\n    save_dir = \"./models/%s\" % config.dataset\n\n    if not os.path.exists(save_dir):\n        os.makedirs(save_dir)\n\n    suffix = \"%s_%sDecoder.ckpt\" % \\\n            (config.dataset, config.decoder_hparams[\"type\"])\n\n    save_path = os.path.join(save_dir, suffix)\n\n    # KL term annealing rate\n    anneal_r = 1.0 / (config.kl_anneal_hparams[\"warm_up\"] * \\\n        (train_data.dataset_size() / config.batch_size))\n\n    # Model architecture\n    encoder_embedder = tx.modules.WordEmbedder(\n            vocab_size=train_data.vocab.size, hparams=config.enc_emb_hparams)\n    decoder_embedder = tx.modules.WordEmbedder(\n            vocab_size=train_data.vocab.size, hparams=config.dec_emb_hparams)\n\n\n    input_embed = encoder_embedder(data_batch[\"text_ids\"])\n    output_embed = decoder_embedder(data_batch[\"text_ids\"][:, :-1])\n\n    encoder = tx.modules.UnidirectionalRNNEncoder(\n        hparams={\"rnn_cell\": config.enc_cell_hparams})\n\n    if config.decoder_hparams[\"type\"] == \"lstm\":\n        decoder = tx.modules.BasicRNNDecoder(\n            vocab_size=train_data.vocab.size,\n            hparams={\"rnn_cell\": config.dec_cell_hparams})\n        decoder_initial_state_size = decoder.cell.state_size\n    elif config.decoder_hparams[\"type\"] == 'transformer':\n        decoder = tx.modules.TransformerDecoder(\n            embedding=decoder_embedder.embedding,\n            hparams=config.trans_hparams)\n        decoder_initial_state_size = tf.TensorShape(\n            [1, config.dec_emb_hparams[\"dim\"]])\n    else:\n        raise NotImplementedError\n\n    connector_mlp = tx.modules.MLPTransformConnector(\n        config.latent_dims * 2)\n\n    connector_stoch = tx.modules.ReparameterizedStochasticConnector(\n        decoder_initial_state_size)\n\n\n    _, ecdr_states = encoder(\n        input_embed,\n        sequence_length=data_batch[\"length\"])\n\n    mean_logvar = connector_mlp(ecdr_states)\n    mean, logvar = tf.split(mean_logvar, 2, 1)\n    kl_loss = kl_dvg(mean, logvar)\n\n    dst = tfd.MultivariateNormalDiag(\n        loc=mean,\n        scale_diag=tf.exp(0.5 * logvar))\n\n    dcdr_states, latent_z = connector_stoch(dst)\n\n    # decoder\n    if config.decoder_hparams[\"type\"] == \"lstm\":\n        # concat latent variable to input at every time step\n        latent_z = tf.expand_dims(latent_z, axis=1)\n        latent_z = tf.tile(latent_z, [1, tf.shape(output_embed)[1], 1])\n        output_embed = tf.concat([output_embed, latent_z], axis=2)\n\n        outputs, _, _ = decoder(\n            initial_state=dcdr_states,\n            decoding_strategy=\"train_greedy\",\n            inputs=output_embed,\n            sequence_length=data_batch[\"length\"]-1)\n    else:\n        outputs = decoder(\n            inputs=output_embed,\n            memory=dcdr_states,\n            memory_sequence_length=tf.ones(tf.shape(dcdr_states)[0]))\n\n    logits = outputs.logits\n\n    seq_lengths = data_batch[\"length\"] - 1\n    # Losses & train ops\n    rc_loss = tx.losses.sequence_sparse_softmax_cross_entropy(\n        labels=data_batch[\"text_ids\"][:, 1:],\n        logits=logits,\n        sequence_length=data_batch[\"length\"]-1)\n\n    # KL annealing\n    kl_weight = tf.placeholder(tf.float32, shape=())\n\n    nll = rc_loss + kl_weight * kl_loss\n\n    learning_rate = tf.placeholder(dtype=tf.float32, shape=(),\n                                   name='learning_rate')\n    train_op = tx.core.get_train_op(nll, learning_rate=learning_rate,\n                                    hparams=config.opt_hparams)\n\n    def _run_epoch(sess, epoch, mode_string, display=10):\n        if mode_string == 'train':\n            iterator.switch_to_train_data(sess)\n        elif mode_string == 'valid':\n            iterator.switch_to_val_data(sess)\n        elif mode_string == 'test':\n            iterator.switch_to_test_data(sess)\n\n        step = 0\n        start_time = time.time()\n        num_words = num_sents = 0\n        nll_ = 0.\n        kl_loss_ = rc_loss_ = 0.\n\n        while True:\n            try:\n                fetches = {\"nll\": nll,\n                           \"kl_loss\": kl_loss,\n                           \"rc_loss\": rc_loss,\n                           \"lengths\": seq_lengths}\n\n                if mode_string == 'train':\n                    fetches[\"train_op\"] = train_op\n                    opt_vars[\"kl_weight\"] = min(\n                        1.0, opt_vars[\"kl_weight\"] + anneal_r)\n\n                    kl_weight_ = opt_vars[\"kl_weight\"]\n                else:\n                    kl_weight_ = 1.0\n\n                mode = (tf.estimator.ModeKeys.TRAIN if mode_string == 'train'\n                        else tf.estimator.ModeKeys.EVAL)\n\n                feed = {tx.global_mode(): mode,\n                        kl_weight: kl_weight_,\n                        learning_rate: opt_vars[\"learning_rate\"]}\n\n                fetches_ = sess.run(fetches, feed_dict=feed)\n\n                batch_size = len(fetches_[\"lengths\"])\n                num_sents += batch_size\n\n                num_words += sum(fetches_[\"lengths\"])\n                nll_ += fetches_[\"nll\"] * batch_size\n                kl_loss_ += fetches_[\"kl_loss\"] * batch_size\n                rc_loss_ += fetches_[\"rc_loss\"] * batch_size\n\n                if step % display == 0 and mode_string == 'train':\n                    print('%s: epoch %d, step %d, nll %.4f, klw: %.4f, ' \\\n                           'KL %.4f,  rc %.4f, log_ppl %.4f, ppl %.4f, ' \\\n                           'time elapsed: %.1fs' % \\\n                          (mode_string, epoch, step, nll_ / num_sents,\n                           opt_vars[\"kl_weight\"], kl_loss_ / num_sents,\n                           rc_loss_ / num_sents, nll_ / num_words,\n                           np.exp(nll_ / num_words), time.time() - start_time))\n\n                    sys.stdout.flush()\n\n                step += 1\n\n            except tf.errors.OutOfRangeError:\n                print('\\n%s: epoch %d, nll %.4f, KL %.4f, rc %.4f, ' \\\n                      'log_ppl %.4f, ppl %.4f\\n' %\n                      (mode_string, epoch, nll_ / num_sents,\n                       kl_loss_ / num_sents, rc_loss_ / num_sents,\n                       nll_ / num_words, np.exp(nll_ / num_words)))\n                break\n\n        return nll_ / num_sents, np.exp(nll_ / num_words)\n\n    def generate(sess, saver, fname=None):\n        if tf.train.checkpoint_exists(FLAGS.model):\n            saver.restore(sess, FLAGS.model)\n        else:\n            raise ValueError(\"cannot find checkpoint model\")\n\n        batch_size = train_data.batch_size\n\n        dst = tfd.MultivariateNormalDiag(\n            loc=tf.zeros([batch_size, config.latent_dims]),\n            scale_diag=tf.ones([batch_size, config.latent_dims]))\n\n        dcdr_states, latent_z = connector_stoch(dst)\n\n        # to concatenate latent variable to input word embeddings\n        def _cat_embedder(ids):\n            embedding = decoder_embedder(ids)\n            return tf.concat([embedding, latent_z], axis=1)\n\n        vocab = train_data.vocab\n        start_tokens = tf.ones(batch_size, tf.int32) * vocab.bos_token_id;\n        end_token = vocab.eos_token_id;\n\n        if config.decoder_hparams[\"type\"] == \"lstm\":\n            outputs, _, _ = decoder(\n                initial_state=dcdr_states,\n                decoding_strategy=\"infer_sample\",\n                embedding=_cat_embedder,\n                max_decoding_length=100,\n                start_tokens=start_tokens,\n                end_token=end_token)\n        else:\n            outputs, _ = decoder(\n                memory=dcdr_states,\n                decoding_strategy=\"infer_sample\",\n                memory_sequence_length=tf.ones(tf.shape(dcdr_states)[0]),\n                max_decoding_length=100,\n                start_tokens=start_tokens,\n                end_token=end_token)\n\n        sample_tokens = vocab.map_ids_to_tokens(outputs.sample_id)\n        sess.run(tf.tables_initializer())\n\n        mode_key = tf.estimator.ModeKeys.EVAL\n        feed = {tx.global_mode():mode_key}\n        sample_tokens_ = sess.run(sample_tokens, feed_dict=feed)\n        if fname is None:\n            fh = sys.stdout\n        else:\n            fh = open(fname, 'w', encoding='utf-8')\n\n        for sent in sample_tokens_:\n            sent = list(sent)\n            end_id = sent.index(vocab.eos_token)\n            fh.write(' '.join(sent[:end_id+1]) + '\\n')\n\n        fh.close()\n\n    saver = tf.train.Saver()\n    with tf.Session() as sess:\n        # generate samples from prior\n        if FLAGS.mode == \"predict\":\n            generate(sess, saver, FLAGS.out)\n            return\n\n        sess.run(tf.global_variables_initializer())\n        sess.run(tf.local_variables_initializer())\n        sess.run(tf.tables_initializer())\n\n        # Counts trainable parameters\n        total_parameters = 0\n        for variable in tf.trainable_variables():\n            shape = variable.get_shape() # shape is an array of tf.Dimension\n            variable_parameters = 1\n            for dim in shape:\n                variable_parameters *= dim.value\n            total_parameters += variable_parameters\n        print(\"%d total parameters\" % total_parameters)\n\n        best_nll = best_ppl = 0.\n\n        for epoch in range(config.num_epochs):\n            _, _ = _run_epoch(sess, epoch, 'train', display=200)\n            val_nll, _ = _run_epoch(sess, epoch, 'valid')\n            test_nll, test_ppl = _run_epoch(sess, epoch, 'test')\n\n            if val_nll < opt_vars['best_valid_nll']:\n                opt_vars['best_valid_nll'] = val_nll\n                opt_vars['steps_not_improved'] = 0\n                best_nll = test_nll\n                best_ppl = test_ppl\n                saver.save(sess, save_path)\n            else:\n                opt_vars['steps_not_improved'] += 1\n                if opt_vars['steps_not_improved'] == decay_ts:\n                    old_lr = opt_vars['learning_rate']\n                    opt_vars['learning_rate'] *= decay_factor\n                    opt_vars['steps_not_improved'] = 0\n                    new_lr = opt_vars['learning_rate']\n\n                    print('-----\\nchange lr, old lr: %f, new lr: %f\\n-----' %\n                          (old_lr, new_lr))\n\n                    saver.restore(sess, save_path)\n\n                    decay_cnt += 1\n                    if decay_cnt == max_decay:\n                        break\n\n        print('\\nbest testing nll: %.4f, best testing ppl %.4f\\n' %\n              (best_nll, best_ppl))\n\n\nif __name__ == '__main__':\n    tf.app.run(main=_main)\n"
  },
  {
    "path": "texar_repo/requirements.txt",
    "content": "tensorflow >= 1.7.0\ntensorflow-gpu >= 1.7.0\ntensorflow-probability >= 0.3.0\ntensorflow-probability-gpu >= 0.3.0\nfuncsigs >= 1.0.2\n"
  },
  {
    "path": "texar_repo/setup.py",
    "content": "import setuptools\n\n\nlong_description = '''\nTexar is an open-source toolkit based on Tensorflow,\naiming to support a broad set of machine learning especially text generation tasks,\nsuch as machine translation, dialog, summarization, content manipulation, language modeling, and so on.\n\nTexar is designed for both researchers and practitioners for fast prototyping and experimentation.\n'''\n\nsetuptools.setup(\n    name=\"texar\",\n    version=\"0.1\",\n    url=\"https://github.com/asyml/texar\",\n\n    description=\"Toolkit for Text Generation and Beyond\",\n    long_description=long_description,\n    license='Apache License Version 2.0',\n\n    packages=setuptools.find_packages(),\n    platforms='any',\n\n    install_requires=[\n        'numpy',\n        'pyyaml',\n        'requests',\n        'funcsigs',\n    ],\n    extras_require={\n        'tensorflow-cpu': ['tensorflow>=1.7.0', 'tensorflow-probability >= 0.3.0'],\n        'tensorflow-gpu': ['tensorflow-gpu>=1.7.0', 'tensorflow-probability-gpu >= 0.3.0']\n    },\n    package_data={\n        \"texar\": [\n            \"../bin/utils/multi-bleu.perl\",\n        ]\n    },\n    classifiers=[\n        'Intended Audience :: Developers',\n        'Intended Audience :: Education',\n        'Intended Audience :: Science/Research',\n        'Operating System :: OS Independent',\n        'Programming Language :: Python',\n        'Programming Language :: Python :: 2.7',\n        'Programming Language :: Python :: 3.5',\n        'Programming Language :: Python :: 3.6',\n    ],\n)\n"
  },
  {
    "path": "texar_repo/texar/__init__.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nModules of texar library.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=wildcard-import\n\nfrom texar.module_base import *\nfrom texar.hyperparams import *\nfrom texar.context import *\nfrom texar import modules\nfrom texar import core\nfrom texar import losses\nfrom texar import models\nfrom texar import data\nfrom texar import evals\nfrom texar import agents\nfrom texar import run\nfrom texar import utils\n"
  },
  {
    "path": "texar_repo/texar/agents/__init__.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nVarious RL Agents\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=wildcard-import\n\nfrom texar.agents.pg_agent import *\nfrom texar.agents.seq_pg_agent import *\nfrom texar.agents.dqn_agent import *\nfrom texar.agents.ac_agent import *\nfrom texar.agents.agent_utils import *\ntry:\n    from texar.agents.agent_gym_utils import *\nexcept ImportError:\n    pass\n"
  },
  {
    "path": "texar_repo/texar/agents/ac_agent.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Actor-critic agent.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\nimport numpy as np\n\nfrom texar.agents.episodic_agent_base import EpisodicAgentBase\nfrom texar.utils import utils\n\n# pylint: disable=too-many-instance-attributes, protected-access\n# pylint: disable=too-many-arguments\n\n__all__ = [\n    \"ActorCriticAgent\"\n]\n\n\nclass ActorCriticAgent(EpisodicAgentBase):\n    \"\"\"Actor-critic agent for episodic setting.\n\n    An actor-critic algorithm consists of several components:\n\n        - **Actor** is the policy to optimize. As a temporary implementation,\\\n        here by default we use a :class:`~texar.agents.PGAgent` instance \\\n        that wraps a `policy net` and provides proper interfaces to perform \\\n        the role of an actor.\n        - **Critic** that provides learning signals to the actor. Again, as \\\n        a temporary implemetation, here by default we use a \\\n        :class:`~texar.agents.DQNAgent` instance that wraps a `Q net` and \\\n        provides proper interfaces to perform the role of a critic.\n\n    Args:\n        env_config: An instance of :class:`~texar.agents.EnvConfig` specifying\n            action space, observation space, and reward range, etc. Use\n            :func:`~texar.agents.get_gym_env_config` to create an EnvConfig\n            from a gym environment.\n        sess (optional): A tf session.\n            Can be `None` here and set later with `agent.sess = session`.\n        actor (optional): An instance of :class:`~texar.agents.PGAgent` that\n            performs as actor in the algorithm.\n            If not provided, an actor is created based on :attr:`hparams`.\n        actor_kwargs (dict, optional): Keyword arguments for actor\n            constructor. Note that the `hparams` argument for actor\n            constructor is specified in the \"actor_hparams\" field of\n            :attr:`hparams` and should not be included in `actor_kwargs`.\n            Ignored if :attr:`actor` is given.\n        critic (optional): An instance of :class:`~texar.agents.DQNAgent` that\n            performs as critic in the algorithm.\n            If not provided, a critic is created based on :attr:`hparams`.\n        critic_kwargs (dict, optional): Keyword arguments for critic\n            constructor. Note that the `hparams` argument for critic\n            constructor is specified in the \"critic_hparams\" field of\n            :attr:`hparams` and should not be included in `critic_kwargs`.\n            Ignored if :attr:`critic` is given.\n        hparams (dict or HParams, optional): Hyperparameters. Missing\n            hyperparamerters will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n    \"\"\"\n\n    def __init__(self,\n                 env_config,\n                 sess=None,\n                 actor=None,\n                 actor_kwargs=None,\n                 critic=None,\n                 critic_kwargs=None,\n                 hparams=None):\n        EpisodicAgentBase.__init__(self, env_config=env_config, hparams=hparams)\n\n        self._sess = sess\n        self._num_actions = self._env_config.action_space.high - \\\n                            self._env_config.action_space.low\n\n        with tf.variable_scope(self.variable_scope):\n            if actor is None:\n                kwargs = utils.get_instance_kwargs(\n                    actor_kwargs, self._hparams.actor_hparams)\n                kwargs.update(dict(env_config=env_config, sess=sess))\n                actor = utils.get_instance(\n                    class_or_name=self._hparams.actor_type,\n                    kwargs=kwargs,\n                    module_paths=['texar.agents', 'texar.custom'])\n            self._actor = actor\n\n            if critic is None:\n                kwargs = utils.get_instance_kwargs(\n                    critic_kwargs, self._hparams.critic_hparams)\n                kwargs.update(dict(env_config=env_config, sess=sess))\n                critic = utils.get_instance(\n                    class_or_name=self._hparams.critic_type,\n                    kwargs=kwargs,\n                    module_paths=['texar.agents', 'texar.custom'])\n            self._critic = critic\n\n            if self._actor._discount_factor != self._critic._discount_factor:\n                raise ValueError('discount_factor of the actor and the critic '\n                                 'must be the same.')\n            self._discount_factor = self._actor._discount_factor\n\n            self._observs = []\n            self._actions = []\n            self._rewards = []\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values:\n\n        .. role:: python(code)\n           :language: python\n\n        .. code-block:: python\n\n            {\n                'actor_type': 'PGAgent',\n                'actor_hparams': None,\n                'critic_type': 'DQNAgent',\n                'critic_hparams': None,\n                'name': 'actor_critic_agent'\n            }\n\n        Here:\n\n        \"actor_type\" : str or class or instance\n            Actor. Can be class, its\n            name or module path, or a class instance. If class name is given,\n            the class must be from module :mod:`texar.agents` or\n            :mod:`texar.custom`. Ignored if a `actor` is given to\n            the agent constructor.\n\n        \"actor_kwargs\" : dict, optional\n            Hyperparameters for the actor class. With the :attr:`actor_kwargs`\n            argument to the constructor, an actor is created with\n            :python:`actor_class(**actor_kwargs, hparams=actor_hparams)`.\n\n        \"critic_type\" : str or class or instance\n            Critic. Can be class, its\n            name or module path, or a class instance. If class name is given,\n            the class must be from module :mod:`texar.agents` or\n            :mod:`texar.custom`. Ignored if a `critic` is given to\n            the agent constructor.\n\n        \"critic_kwargs\" : dict, optional\n            Hyperparameters for the critic class. With the :attr:`critic_kwargs`\n            argument to the constructor, an critic is created with\n            :python:`critic_class(**critic_kwargs, hparams=critic_hparams)`.\n\n        \"name\" : str\n            Name of the agent.\n        \"\"\"\n        return {\n            'actor_type': 'PGAgent',\n            'actor_hparams': None,\n            'critic_type': 'DQNAgent',\n            'critic_hparams': None,\n            'name': 'actor_critic_agent'\n        }\n\n    def _reset(self):\n        self._actor._reset()\n        self._critic._reset()\n\n    def _observe(self, reward, terminal, train_policy, feed_dict):\n        self._train_actor(\n            observ=self._observ,\n            action=self._action,\n            feed_dict=feed_dict)\n        self._critic._observe(reward, terminal, train_policy, feed_dict)\n\n    def _train_actor(self, observ, action, feed_dict):\n        qvalues = self._critic._qvalues_from_target(observ=observ)\n        advantage = qvalues[0][action] - np.mean(qvalues)\n        # TODO (bowen): should be a funciton to customize?\n\n        feed_dict_ = {\n            self._actor._observ_inputs: [observ],\n            self._actor._action_inputs: [action],\n            self._actor._advantage_inputs: [advantage]\n        }\n        feed_dict_.update(feed_dict)\n\n        self._actor._train_policy(feed_dict=feed_dict_)\n\n    def get_action(self, observ, feed_dict=None):\n        self._observ = observ\n        self._action = self._actor.get_action(observ, feed_dict=feed_dict)\n\n        self._critic._update_observ_action(self._observ, self._action)\n\n        return self._action\n\n    @property\n    def sess(self):\n        \"\"\"The tf session.\n        \"\"\"\n        return self._sess\n\n    @sess.setter\n    def sess(self, session):\n        self._sess = session\n        self._actor._sess = session\n        self._critic._sess = session\n"
  },
  {
    "path": "texar_repo/texar/agents/agent_base.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nBase class for reinforcement learning agents.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom texar.hyperparams import HParams\nfrom texar.utils.variables import get_unique_named_variable_scope\n\n# pylint: disable=too-many-instance-attributes\n\n__all__ = [\n    \"AgentBase\"\n]\n\nclass AgentBase(object):\n    \"\"\"\n    Base class inherited by RL agents.\n\n    Args:\n        TODO\n    \"\"\"\n    def __init__(self, hparams=None):\n        self._hparams = HParams(hparams, self.default_hparams())\n\n        name = self._hparams.name\n        self._variable_scope = get_unique_named_variable_scope(name)\n        self._unique_name = self._variable_scope.name.split(\"/\")[-1]\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        TODO\n        \"\"\"\n        return {\n            'name': 'agent'\n        }\n\n    @property\n    def variable_scope(self):\n        \"\"\"The variable scope of the agent.\n        \"\"\"\n        return self._variable_scope\n\n    @property\n    def name(self):\n        \"\"\"The name of the module (not uniquified).\n        \"\"\"\n        return self._unique_name\n\n    @property\n    def hparams(self):\n        \"\"\"A :class:`~texar.hyperparams.HParams` instance. The hyperparameters\n        of the module.\n        \"\"\"\n        return self._hparams\n"
  },
  {
    "path": "texar_repo/texar/agents/agent_gym_utils.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nVarious agent utilities based on OpenAI Gym.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport gym\n\n__all__ = [\n    \"convert_gym_space\",\n    \"get_gym_env_config\"\n]\n\ndef convert_gym_space(spc):\n    \"\"\"Converts a :gym:`gym.Space <#spaces>` instance to a\n    :class:`~texar.agents.Space` instance.\n\n    Args:\n        spc: An instance of `gym.Space` or\n            :class:`~texar.agents.Space`.\n    \"\"\"\n    from texar.agents.agent_utils import Space\n    if isinstance(spc, Space):\n        return spc\n    if isinstance(spc, gym.spaces.Discrete):\n        return Space(shape=(), low=0, high=spc.n, dtype=spc.dtype)\n    elif isinstance(spc, gym.spaces.Box):\n        return Space(\n            shape=spc.shape, low=spc.low, high=spc.high, dtype=spc.dtype)\n\ndef get_gym_env_config(env):\n    \"\"\"Creates an instance of :class:`~texar.agents.EnvConfig`\n    from a :gym:`gym env <#environments>`.\n\n    Args:\n        env: An instance of OpenAI gym Environment.\n\n    Returns:\n        An instance of :class:`~texar.agents.EnvConfig`.\n    \"\"\"\n    from texar.agents.agent_utils import EnvConfig\n    return EnvConfig(\n        action_space=env.action_space,\n        observ_space=env.observation_space,\n        reward_range=env.reward_range)\n\n"
  },
  {
    "path": "texar_repo/texar/agents/agent_utils.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nVarious agent utilities.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=too-many-arguments, too-few-public-methods, no-member\n# pylint: disable=invalid-name, wrong-import-position\n\nimport numpy as np\n\ngym_utils = None\ntry:\n    from texar.agents import agent_gym_utils as gym_utils\nexcept ImportError:\n    pass\n\n__all__ = [\n    \"Space\",\n    \"EnvConfig\"\n]\n\nclass Space(object):\n    \"\"\"Observation and action spaces. Describes valid actions and observations.\n    Similar to :gym:`gym.Space <#spaces>`.\n\n    Args:\n        shape (optional): Shape of the space, a tuple. If not\n            given, infers from :attr:`low` and :attr:`high`.\n        low (optional): Lower bound (inclusive) of each dimension of the\n            space. Must have\n            shape as specified by :attr:`shape`, and of the same shape with\n            with :attr:`high` (if given). If `None`, set to `-inf` for each\n            dimension.\n        high (optional): Upper bound (inclusive) of each dimension of the\n            space. Must have\n            shape as specified by :attr:`shape`, and of the same shape with\n            with :attr:`low` (if given). If `None`, set to `inf` for each\n            dimension.\n        dtype (optional): Data type of elements in the space. If not given,\n            infers from :attr:`low` (if given) or set to `float`.\n\n    Example:\n\n        .. code-block:: python\n\n            s = Space(low=0, high=10, dtype=np.int32)\n            #s.contains(2) == True\n            #s.contains(10) == True\n            #s.contains(11) == False\n            #s.shape == ()\n\n            s2 = Space(shape=(2,2), high=np.ones([2,2]), dtype=np.float)\n            #s2.low == [[-inf, -inf], [-inf, -inf]]\n            #s2.high == [[1., 1.], [1., 1.]]\n    \"\"\"\n    def __init__(self, shape=None, low=None, high=None, dtype=None):\n        if low is None:\n            low = -float('inf')\n        if high is None:\n            high = float('inf')\n\n        if shape is None:\n            low = np.asarray(low)\n            high = np.asarray(high)\n            if low.shape != high.shape:\n                raise ValueError('`low` and `high` must have the same shape.')\n            shape = low.shape\n        else:\n            shape = tuple(shape)\n\n        if np.isscalar(low):\n            low = low + np.zeros(shape, dtype=dtype)\n        if np.isscalar(high):\n            high = high + np.zeros(shape, dtype=dtype)\n        if shape != low.shape or shape != high.shape:\n            raise ValueError(\n                'Shape inconsistent: shape={}, low.shape={}, high.shape={}'\n                .format(shape, low.shape, high.shape))\n        if dtype is None:\n            dtype = low.dtype\n        dtype = np.dtype(dtype)\n        low = low.astype(dtype)\n        high = high.astype(dtype)\n        self._shape = shape\n        self._low = low\n        self._high = high\n        self._dtype = dtype\n\n    def contains(self, x):\n        \"\"\"Checks if x is contained in the space. Returns a `bool`.\n        \"\"\"\n        x = np.asarray(x)\n        dtype_match = True\n        if self._dtype.kind in np.typecodes['AllInteger']:\n            if x.dtype.kind not in np.typecodes['AllInteger']:\n                dtype_match = False\n        shape_match = x.shape == self._shape\n        low_match = (x >= self._low).all()\n        high_match = (x <= self._high).all()\n        return dtype_match and shape_match and low_match and high_match\n\n    @property\n    def shape(self):\n        \"\"\"Shape of the space.\n        \"\"\"\n        return self._shape\n\n    @property\n    def low(self):\n        \"\"\"Lower bound of the space.\n        \"\"\"\n        return self._low\n\n    @property\n    def high(self):\n        \"\"\"Upper bound of the space.\n        \"\"\"\n        return self._high\n\n    @property\n    def dtype(self):\n        \"\"\"Data type of the element.\n        \"\"\"\n        return self._dtype\n\nclass EnvConfig(object):\n    \"\"\"Configurations of an environment.\n\n    Args:\n        action_space: An instance of :class:`~texar.agents.Space` or\n            :gym:`gym.Space <#spaces>`, the action space.\n        observ_space: An instance of :class:`~texar.agents.Space` or\n            :gym:`gym.Space <#spaces>`, the observation space.\n        reward_range: A tuple corresponding to the min and max possible\n            rewards, e.g., `reward_range=(-1.0, 1.0)`.\n    \"\"\"\n\n    def __init__(self,\n                 action_space,\n                 observ_space,\n                 reward_range):\n        if gym_utils:\n            action_space = gym_utils.convert_gym_space(action_space)\n            observ_space = gym_utils.convert_gym_space(observ_space)\n\n        self.action_space = action_space\n        self.action_dtype = action_space.dtype\n        self.action_shape = action_space.shape\n\n        self.observ_space = observ_space\n        self.observ_dtype = observ_space.dtype\n        self.observ_shape = observ_space.shape\n\n        self.reward_range = reward_range\n"
  },
  {
    "path": "texar_repo/texar/agents/agent_utils_test.py",
    "content": "#\n\"\"\"\nUnit tests for agent utilities.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\n# pylint: disable=no-member, invalid-name, too-many-arguments\n\nimport numpy as np\n\nimport tensorflow as tf\n\nfrom texar.agents.agent_utils import Space\n\nclass SpaceTest(tf.test.TestCase):\n    \"\"\"Tests the Space class.\n    \"\"\"\n\n    def _test_space(self, s, shape, low, high, dtype):\n        self.assertEqual(s.shape, shape)\n        self.assertEqual(s.low, low)\n        self.assertEqual(s.high, high)\n        self.assertEqual(s.dtype, dtype)\n\n    def test_space(self):\n        \"\"\"Tests descrete space.\n        \"\"\"\n        s = Space(shape=(), low=0, high=10, dtype=np.int32)\n        self._test_space(s, (), 0, 10, np.dtype(np.int32))\n        self.assertTrue(s.contains(5))\n        self.assertFalse(s.contains(5.))\n        self.assertFalse(s.contains(15))\n\n        s = Space(low=0, high=10, dtype=np.int32)\n        self._test_space(s, (), 0, 10, np.dtype(np.int32))\n\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/agents/dqn_agent.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Deep Q learning Agent.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport random\nimport numpy as np\n\nimport tensorflow as tf\n\nimport texar as tx\nfrom texar.agents.episodic_agent_base import EpisodicAgentBase\nfrom texar.utils import utils\nfrom texar.core import optimization as opt\n\n# pylint: disable=too-many-instance-attributes, too-many-arguments\n# pylint: disable=invalid-name\n\n__all__ = [\n    \"DQNAgent\"\n]\n\n\nclass DQNAgent(EpisodicAgentBase):\n    \"\"\"Deep Q learning agent for episodic setting.\n\n    A Q learning algorithm consists of several components:\n\n        - A **Q-net** takes in a state and returns Q-value for action sampling.\\\n        See :class:`~texar.modules.CategoricalQNet` for an example Q-net class\\\n        and required interface.\n        - A **replay memory** manages past experience for Q-net updates. See\\\n        :class:`~texar.core.DequeReplayMemory` for an example replay memory\\\n        class and required interface.\n        - An **exploration** that specifies the exploration strategy used\\\n        to train the Q-net. See\\\n        :class:`~texar.core.EpsilonLinearDecayExploration` for an example\\\n        class and required interface.\n\n    Args:\n        env_config: An instance of :class:`~texar.agents.EnvConfig` specifying\n            action space, observation space, and reward range, etc. Use\n            :func:`~texar.agents.get_gym_env_config` to create an EnvConfig\n            from a gym environment.\n        sess (optional): A tf session.\n            Can be `None` here and set later with `agent.sess = session`.\n        qnet (optional): A Q network that predicts Q values given states.\n            If not given, a Q network is created based on :attr:`hparams`.\n        target (optional): A target network to compute target Q values.\n        qnet_kwargs (dict, optional): Keyword arguments for qnet\n            constructor. Note that the `hparams` argument for network\n            constructor is specified in the \"policy_hparams\" field of\n            :attr:`hparams` and should not be included in `policy_kwargs`.\n            Ignored if :attr:`qnet` is given.\n        qnet_caller_kwargs (dict, optional): Keyword arguments for\n            calling `qnet` to get Q values. The `qnet` is called with\n            :python:`outputs=qnet(inputs=observation, **qnet_caller_kwargs)`\n        replay_memory (optional): A replay memory instance.\n            If not given, a replay memory is created based on :attr:`hparams`.\n        replay_memory_kwargs (dict, optional): Keyword arguments for\n            replay_memory constructor.\n            Ignored if :attr:`replay_memory` is given.\n        exploration (optional): An exploration instance used in the algorithm.\n            If not given, an exploration instance is created based on\n            :attr:`hparams`.\n        exploration_kwargs (dict, optional): Keyword arguments for exploration\n            class constructor. Ignored if :attr:`exploration` is given.\n        hparams (dict or HParams, optional): Hyperparameters. Missing\n            hyperparamerters will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n    \"\"\"\n    def __init__(self,\n                 env_config,\n                 sess=None,\n                 qnet=None,\n                 target=None,\n                 qnet_kwargs=None,\n                 qnet_caller_kwargs=None,\n                 replay_memory=None,\n                 replay_memory_kwargs=None,\n                 exploration=None,\n                 exploration_kwargs=None,\n                 hparams=None):\n        EpisodicAgentBase.__init__(self, env_config, hparams)\n\n        self._sess = sess\n        self._cold_start_steps = self._hparams.cold_start_steps\n        self._sample_batch_size = self._hparams.sample_batch_size\n        self._update_period = self._hparams.update_period\n        self._discount_factor = self._hparams.discount_factor\n        self._target_update_strategy = self._hparams.target_update_strategy\n        self._num_actions = self._env_config.action_space.high - \\\n                            self._env_config.action_space.low\n\n        with tf.variable_scope(self.variable_scope):\n            if qnet is None:\n                kwargs = utils.get_instance_kwargs(\n                    qnet_kwargs, self._hparams.qnet_hparams)\n                qnet = utils.check_or_get_instance(\n                    ins_or_class_or_name=self._hparams.qnet_type,\n                    kwargs=kwargs,\n                    module_paths=['texar.modules', 'texar.custom'])\n                target = utils.check_or_get_instance(\n                    ins_or_class_or_name=self._hparams.qnet_type,\n                    kwargs=kwargs,\n                    module_paths=['texar.modules', 'texar.custom'])\n            self._qnet = qnet\n            self._target = target\n            self._qnet_caller_kwargs = qnet_caller_kwargs or {}\n\n            if replay_memory is None:\n                kwargs = utils.get_instance_kwargs(\n                    replay_memory_kwargs, self._hparams.replay_memory_hparams)\n                replay_memory = utils.check_or_get_instance(\n                    ins_or_class_or_name=self._hparams.replay_memory_type,\n                    kwargs=kwargs,\n                    module_paths=['texar.core', 'texar.custom'])\n            self._replay_memory = replay_memory\n\n            if exploration is None:\n                kwargs = utils.get_instance_kwargs(\n                    exploration_kwargs, self._hparams.exploration_hparams)\n                exploration = utils.check_or_get_instance(\n                    ins_or_class_or_name=self._hparams.exploration_type,\n                    kwargs=kwargs,\n                    module_paths=['texar.core', 'texar.custom'])\n            self._exploration = exploration\n\n        self._build_graph()\n\n        self._observ = None\n        self._action = None\n        self._timestep = 0\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values:\n\n        .. role:: python(code)\n           :language: python\n\n        .. code-block:: python\n\n            {\n                'qnet_type': 'CategoricalQNet',\n                'qnet_hparams': None,\n                'replay_memory_type': 'DequeReplayMemory',\n                'replay_memory_hparams': None,\n                'exploration_type': 'EpsilonLinearDecayExploration',\n                'exploration_hparams': None,\n                'optimization': opt.default_optimization_hparams(),\n                'target_update_strategy': 'copy',\n                'cold_start_steps': 100,\n                'sample_batch_size': 32,\n                'update_period': 100,\n                'discount_factor': 0.95,\n                'name': 'dqn_agent'\n            }\n\n        Here:\n\n        \"qnet_type\" : str or class or instance\n            Q-value net. Can be class, its\n            name or module path, or a class instance. If class name is given,\n            the class must be from module :mod:`texar.modules` or\n            :mod:`texar.custom`. Ignored if a `qnet` is given to\n            the agent constructor.\n\n        \"qnet_hparams\" : dict, optional\n            Hyperparameters for the Q net. With the :attr:`qnet_kwargs`\n            argument to the constructor, a network is created with\n            :python:`qnet_class(**qnet_kwargs, hparams=qnet_hparams)`.\n\n        \"replay_memory_type\" : str or class or instance\n            Replay memory class. Can be class, its name or module path,\n            or a class instance.\n            If class name is given, the class must be from module\n            :mod:`texar.core` or :mod:`texar.custom`.\n            Ignored if a `replay_memory` is given to the agent constructor.\n\n        \"replay_memory_hparams\" : dict, optional\n            Hyperparameters for the replay memory. With the\n            :attr:`replay_memory_kwargs` argument to the constructor,\n            a network is created with\n            :python:`replay_memory_class(\n            **replay_memory_kwargs, hparams=replay_memory_hparams)`.\n\n        \"exploration_type\" : str or class or instance\n            Exploration class. Can be class,\n            its name or module path, or a class instance. If class name is\n            given, the class must be from module :mod:`texar.core` or\n            :mod:`texar.custom`. Ignored if a `exploration` is given to\n            the agent constructor.\n\n        \"exploration_hparams\" : dict, optional\n            Hyperparameters for the exploration class.\n            With the :attr:`exploration_kwargs` argument to the constructor,\n            a network is created with :python:`exploration_class(\n            **exploration_kwargs, hparams=exploration_hparams)`.\n\n        \"optimization\" : dict\n            Hyperparameters of optimization for updating the Q-net.\n            See :func:`~texar.core.default_optimization_hparams` for details.\n\n        \"cold_start_steps\": int\n            In the beginning, Q-net is not trained in the first few steps.\n\n        \"sample_batch_size\": int\n            The number of samples taken in replay memory when training.\n\n        \"target_update_strategy\": string\n\n            - If **\"copy\"**, the target network is assigned with the parameter \\\n            of Q-net every :attr:`\"update_period\"` steps.\n\n            - If **\"tau\"**, target will be updated by assigning as\n            ``` (1 - 1/update_period) * target + 1/update_period * qnet ```\n\n        \"update_period\": int\n            Frequecy of updating the target network, i.e., updating\n            the target once for every \"update_period\" steps.\n\n        \"discount_factor\" : float\n            The discount factor of reward.\n\n        \"name\" : str\n            Name of the agent.\n        \"\"\"\n        return {\n            'qnet_type': 'CategoricalQNet',\n            'qnet_hparams': None,\n            'replay_memory_type': 'DequeReplayMemory',\n            'replay_memory_hparams': None,\n            'exploration_type': 'EpsilonLinearDecayExploration',\n            'exploration_hparams': None,\n            'optimization': opt.default_optimization_hparams(),\n            'target_update_strategy': 'copy',\n            'cold_start_steps': 100,\n            'sample_batch_size': 32,\n            'update_period': 100,\n            'discount_factor': 0.95,\n            'name': 'dqn_agent'\n        }\n\n    def _build_graph(self):\n        with tf.variable_scope(self.variable_scope):\n            self._observ_inputs = tf.placeholder(\n                dtype=self._env_config.observ_dtype,\n                shape=[None, ] + list(self._env_config.observ_shape),\n                name='observ_inputs')\n            self._action_inputs = tf.placeholder(\n                dtype=self._env_config.action_dtype,\n                shape=[None, self._num_actions],\n                name='action_inputs')\n            self._y_inputs = tf.placeholder(\n                dtype=tf.float32,\n                shape=[None, ],\n                name='y_inputs')\n\n            self._qnet_outputs = self._get_qnet_outputs(self._observ_inputs)\n            self._target_outputs = self._get_target_outputs(self._observ_inputs)\n            self._td_error = self._get_td_error(\n                qnet_qvalues=self._qnet_outputs['qvalues'],\n                actions=self._action_inputs,\n                y=self._y_inputs)\n            self._train_op = self._get_train_op()\n\n            if self._target_update_strategy == 'copy':\n                self._update_op = self._get_copy_update_op()\n            elif self._target_update_strategy == 'tau':\n                self._update_op = self._get_tau_update_op()\n\n    def _get_qnet_outputs(self, state_inputs):\n        return self._qnet(inputs=state_inputs, **self._qnet_caller_kwargs)\n\n    def _get_target_outputs(self, state_inputs):\n        return self._target(inputs=state_inputs, **self._qnet_caller_kwargs)\n\n    def _get_td_error(self, qnet_qvalues, actions, y):\n        return y - tf.reduce_sum(qnet_qvalues * tf.to_float(actions), axis=1)\n\n    def _get_train_op(self):\n        train_op = opt.get_train_op(\n            loss=tf.reduce_sum(self._td_error ** 2),\n            variables=self._qnet.trainable_variables,\n            hparams=self._hparams.optimization.todict())\n        return train_op\n\n    def _get_copy_update_op(self):\n        op = []\n        for i in range(len(self._qnet.trainable_variables)):\n            op.append(tf.assign(ref=self._target.trainable_variables[i],\n                                value=self._qnet.trainable_variables[i]))\n        return op\n\n    def _get_tau_update_op(self):\n        tau = 1. / self._update_period\n        op = []\n        for i in range(len(self._qnet.trainable_variables)):\n            value_ = (1. - tau) * self._target.trainable_variables[i] + \\\n                    tau * self._qnet.trainable_variables[i]\n            op.append(tf.assign(\n                ref=self._target.trainable_variables[i], value=value_))\n        return op\n\n    def _observe(self, reward, terminal, train_policy, feed_dict):\n        if self._timestep > self._cold_start_steps and train_policy:\n            self._train_qnet(feed_dict)\n\n        action_one_hot = [0.] * self._num_actions\n        action_one_hot[self._action] = 1.\n\n        self._replay_memory.add(dict(\n            observ=self._observ,\n            action=action_one_hot,\n            reward=reward,\n            terminal=terminal,\n            next_observ=None))\n        self._timestep += 1\n\n    def _train_qnet(self, feed_dict):\n        minibatch = self._replay_memory.get(self._sample_batch_size)\n        observ_batch = np.array([data['observ'] for data in minibatch])\n        action_batch = np.array([data['action'] for data in minibatch])\n        reward_batch = np.array([data['reward'] for data in minibatch])\n        terminal_batch = np.array([data['terminal'] for data in minibatch])\n        next_observ_batch = \\\n            np.array([data['next_observ'] for data in minibatch])\n\n        target_qvalue = self._sess.run(\n            self._target_outputs['qvalues'], feed_dict={\n                self._observ_inputs: next_observ_batch,\n                tx.global_mode(): tf.estimator.ModeKeys.PREDICT})\n\n        y_batch = reward_batch\n        for i in range(self._sample_batch_size):\n            if not terminal_batch[i]:\n                y_batch[i] += self._discount_factor * np.max(target_qvalue[i])\n\n        feed_dict_ = {\n            self._observ_inputs: observ_batch,\n            self._y_inputs: y_batch,\n            self._action_inputs: action_batch\n        }\n        feed_dict_.update(feed_dict or {})\n\n        self._sess.run(self._train_op, feed_dict=feed_dict_)\n\n        self._update_target(feed_dict)\n\n    def _update_target(self, feed_dict):\n        if self._target_update_strategy == 'tau' or (\n                self._target_update_strategy == 'copy' and\n                self._timestep % self._update_period == 0):\n            self._sess.run(self._update_op, feed_dict=feed_dict)\n\n    def _qvalues_from_qnet(self, observ):\n        return self._sess.run(\n            self._qnet_outputs['qvalues'],\n            feed_dict={self._observ_inputs: np.array([observ]),\n                       tx.global_mode(): tf.estimator.ModeKeys.PREDICT})\n\n    def _qvalues_from_target(self, observ):\n        return self._sess.run(\n            self._target_outputs['qvalues'],\n            feed_dict={self._observ_inputs: np.array([observ]),\n                       tx.global_mode(): tf.estimator.ModeKeys.PREDICT})\n\n    def _update_observ_action(self, observ, action):\n        self._observ = observ\n        self._action = action\n        if self._replay_memory.size() > 0:\n            self._replay_memory.last()['next_observ'] = self._observ\n\n    def _get_action(self, observ, feed_dict=None):\n        qvalue = self._qvalues_from_qnet(observ)\n\n        if random.random() < self._exploration.get_epsilon(self._timestep):\n            action = random.randrange(self._num_actions)\n        else:\n            action = np.argmax(qvalue)\n\n        self._update_observ_action(observ, action)\n\n        return action\n\n    def _reset(self):\n        self._observ = None\n        self._action = None\n\n    @property\n    def sess(self):\n        \"\"\"The tf session.\n        \"\"\"\n        return self._sess\n\n    @sess.setter\n    def sess(self, session):\n        self._sess = session\n"
  },
  {
    "path": "texar_repo/texar/agents/episodic_agent_base.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nBase class for episodic reinforcement learning agents.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom texar.agents.agent_base import AgentBase\n\n# pylint: disable=too-many-instance-attributes\n\nclass EpisodicAgentBase(AgentBase):\n    \"\"\"Base class inherited by episodic RL agents.\n\n    An agent is a wrapper of the **training process** that trains a model\n    with RL algorithms. Agent itself does not create new trainable variables.\n\n    An episodic RL agent typically provides 3 interfaces, namely, :meth:`reset`,\n    :meth:`get_action` and :meth:`observe`, and is used as the following\n    example.\n\n    Example:\n\n        .. code-block:: python\n\n            env = SomeEnvironment(...)\n            agent = PGAgent(...)\n\n            while True:\n                # Starts one episode\n                agent.reset()\n                observ = env.reset()\n                while True:\n                    action = agent.get_action(observ)\n                    next_observ, reward, terminal = env.step(action)\n                    agent.observe(reward, terminal)\n                    observ = next_observ\n                    if terminal:\n                        break\n\n    Args:\n        env_config: An instance of :class:`~texar.agents.EnvConfig` specifying\n            action space, observation space, and reward range, etc. Use\n            :func:`~texar.agents.get_gym_env_config` to create an EnvConfig\n            from a gym environment.\n        hparams (dict or HParams, optional): Hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n    \"\"\"\n    def __init__(self, env_config, hparams=None):\n        AgentBase.__init__(self, hparams)\n\n        self._env_config = env_config\n\n        self._reset_tmplt_fn = tf.make_template(\n            \"{}_reset\".format(self.name), self._reset)\n        self._observe_tmplt_fn = tf.make_template(\n            \"{}_observe\".format(self.name), self._observe)\n        self._get_action_tmplt_fn = tf.make_template(\n            \"{}_get_action\".format(self.name), self._get_action)\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        .. code-block:: python\n\n            {\n                \"name\": \"agent\"\n            }\n        \"\"\"\n        return {\n            'name': 'agent'\n        }\n\n    def reset(self):\n        \"\"\"Resets the states to begin new episode.\n        \"\"\"\n        self._reset_tmplt_fn()\n\n    def _reset(self):\n        raise NotImplementedError\n\n    def observe(self, reward, terminal, train_policy=True, feed_dict=None):\n        \"\"\"Observes experience from environment.\n\n        Args:\n            reward: Reward of the action. The configuration (e.g., shape) of\n                the reward is defined in :attr:`env_config`.\n            terminal (bool): Whether the episode is terminated.\n            train_policy (bool): Wether to update the policy for this step.\n            feed_dict (dict, optional): Any stuffs fed to running the training\n                operator.\n        \"\"\"\n        return self._observe_tmplt_fn(reward, terminal, train_policy, feed_dict)\n\n    def _observe(self, reward, terminal, train_policy, feed_dict):\n        raise NotImplementedError\n\n    def get_action(self, observ, feed_dict=None):\n        \"\"\"Gets action according to observation.\n\n        Args:\n            observ: Observation from the environment.\n\n        Returns:\n            action from the policy.\n        \"\"\"\n        return self._get_action_tmplt_fn(observ, feed_dict)\n\n    def _get_action(self, observ, feed_dict):\n        raise NotImplementedError\n\n    @property\n    def env_config(self):\n        \"\"\"Environment configuration.\n        \"\"\"\n        return self._env_config\n"
  },
  {
    "path": "texar_repo/texar/agents/pg_agent.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Policy Gradient agent.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=too-many-instance-attributes, too-many-arguments\n\nimport tensorflow as tf\n\nfrom texar.agents.episodic_agent_base import EpisodicAgentBase\nfrom texar.utils import utils\nfrom texar.core import optimization as opt\nfrom texar.losses import pg_losses as losses\nfrom texar.losses.rewards import discount_reward\n\n\nclass PGAgent(EpisodicAgentBase):\n    \"\"\"Policy gradient agent for episodic setting. This agent here supports\n    **un-batched** training, i.e., each time generates one action, takes one\n    observation, and updates the policy.\n\n    The policy must take in an observation of shape `[1] + observation_shape`,\n    where the first dimension 1 stands for batch dimension, and output a `dict`\n    containing:\n\n    - Key **\"action\"** whose value is a Tensor of shape \\\n    `[1] + action_shape` containing a single action.\n    - One of keys \"log_prob\" or \"dist\":\n\n        - **\"log_prob\"**: A Tensor of shape `[1]`, the log probability of the \\\n        \"action\".\n        - **\"dist\"**: A \\\n        tf_main:`tf.distributions.Distribution <distributions/Distribution>`\\\n        with the `log_prob` interface and \\\n        `log_prob = dist.log_prob(outputs[\"action\"])`.\n\n    .. role:: python(code)\n       :language: python\n\n    Args:\n        env_config: An instance of :class:`~texar.agents.EnvConfig` specifying\n            action space, observation space, and reward range, etc. Use\n            :func:`~texar.agents.get_gym_env_config` to create an EnvConfig\n            from a gym environment.\n        sess (optional): A tf session.\n            Can be `None` here and set later with `agent.sess = session`.\n        policy (optional): A policy net that takes in observation and outputs\n            actions and probabilities.\n            If not given, a policy network is created based on :attr:`hparams`.\n        policy_kwargs (dict, optional): Keyword arguments for policy\n            constructor. Note that the `hparams` argument for network\n            constructor is specified in the \"policy_hparams\" field of\n            :attr:`hparams` and should not be included in `policy_kwargs`.\n            Ignored if :attr:`policy` is given.\n        policy_caller_kwargs (dict, optional): Keyword arguments for\n            calling the policy to get actions. The policy is called with\n            :python:`outputs=policy(inputs=observation, **policy_caller_kwargs)`\n        learning_rate (optional): Learning rate for policy optimization. If\n            not given, determine the learning rate from :attr:`hparams`.\n            See :func:`~texar.core.get_train_op` for more details.\n        hparams (dict or HParams, optional): Hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n    \"\"\"\n    def __init__(self,\n                 env_config,\n                 sess=None,\n                 policy=None,\n                 policy_kwargs=None,\n                 policy_caller_kwargs=None,\n                 learning_rate=None,\n                 hparams=None):\n        EpisodicAgentBase.__init__(self, env_config, hparams)\n\n        self._sess = sess\n        self._lr = learning_rate\n        self._discount_factor = self._hparams.discount_factor\n\n        with tf.variable_scope(self.variable_scope):\n            if policy is None:\n                kwargs = utils.get_instance_kwargs(\n                    policy_kwargs, self._hparams.policy_hparams)\n                policy = utils.check_or_get_instance(\n                    self._hparams.policy_type,\n                    kwargs,\n                    module_paths=['texar.modules', 'texar.custom'])\n            self._policy = policy\n            self._policy_caller_kwargs = policy_caller_kwargs or {}\n\n        self._observs = []\n        self._actions = []\n        self._rewards = []\n\n        self._train_outputs = None\n\n        self._build_graph()\n\n    def _build_graph(self):\n        with tf.variable_scope(self.variable_scope):\n            self._observ_inputs = tf.placeholder(\n                dtype=self._env_config.observ_dtype,\n                shape=[None, ] + list(self._env_config.observ_shape),\n                name='observ_inputs')\n            self._action_inputs = tf.placeholder(\n                dtype=self._env_config.action_dtype,\n                shape=[None, ] + list(self._env_config.action_shape),\n                name='action_inputs')\n            self._advantage_inputs = tf.placeholder(\n                dtype=tf.float32,\n                shape=[None, ],\n                name='advantages_inputs')\n\n            self._outputs = self._get_policy_outputs()\n\n            self._pg_loss = self._get_pg_loss()\n\n            self._train_op = self._get_train_op()\n\n    def _get_policy_outputs(self):\n        outputs = self._policy(\n            inputs=self._observ_inputs, **self._policy_caller_kwargs)\n        return outputs\n\n    def _get_pg_loss(self):\n        if 'log_prob' in self._outputs:\n            log_probs = self._outputs['log_prob']\n        elif 'dist' in self._outputs:\n            log_probs = self._outputs['dist'].log_prob(self._action_inputs)\n        else:\n            raise ValueError('Outputs of the policy must have one of '\n                             '\"log_prob\" or \"dist\".')\n        pg_loss = losses.pg_loss_with_log_probs(\n            log_probs=log_probs,\n            advantages=self._advantage_inputs,\n            average_across_timesteps=True,\n            sum_over_timesteps=False)\n        return pg_loss\n\n    def _get_train_op(self):\n        train_op = opt.get_train_op(\n            loss=self._pg_loss,\n            variables=self._policy.trainable_variables,\n            learning_rate=self._lr,\n            hparams=self._hparams.optimization.todict())\n        return train_op\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values:\n\n        .. role:: python(code)\n           :language: python\n\n        .. code-block:: python\n\n            {\n                'policy_type': 'CategoricalPolicyNet',\n                'policy_hparams': None,\n                'discount_factor': 0.95,\n                'normalize_reward': False,\n                'optimization': default_optimization_hparams(),\n                'name': 'pg_agent',\n            }\n\n        Here:\n\n        \"policy_type\" : str or class or instance\n            Policy net. Can be class, its name or module path, or a class\n            instance. If class name is given, the class must be from module\n            :mod:`texar.modules` or :mod:`texar.custom`. Ignored if a\n            `policy` is given to the agent constructor.\n\n        \"policy_hparams\" : dict, optional\n            Hyperparameters for the policy net. With the :attr:`policy_kwargs`\n            argument to the constructor, a network is created with\n            :python:`policy_class(**policy_kwargs, hparams=policy_hparams)`.\n\n        \"discount_factor\" : float\n            The discount factor of reward.\n\n        \"normalize_reward\" : bool\n            Whether to normalize the discounted reward, by\n            `(discounted_reward - mean) / std`.\n\n        \"optimization\" : dict\n            Hyperparameters of optimization for updating the policy net.\n            See :func:`~texar.core.default_optimization_hparams` for details.\n\n        \"name\" : str\n            Name of the agent.\n        \"\"\"\n        return {\n            'policy_type': 'CategoricalPolicyNet',\n            'policy_hparams': None,\n            'discount_factor': 0.95,\n            'normalize_reward': False,\n            'optimization': opt.default_optimization_hparams(),\n            'name': 'pg_agent',\n        }\n\n    def _reset(self):\n        self._observs = []\n        self._actions = []\n        self._rewards = []\n\n    def _get_action(self, observ, feed_dict):\n        fetches = {\n            \"action\": self._outputs['action']\n        }\n\n        feed_dict_ = {self._observ_inputs: [observ, ]}\n        feed_dict_.update(feed_dict or {})\n\n        vals = self._sess.run(fetches, feed_dict=feed_dict_)\n        action = vals['action']\n        action = action[0] # Removes the batch dimension\n\n        self._observs.append(observ)\n        self._actions.append(action)\n\n        return action\n\n    def _observe(self, reward, terminal, train_policy, feed_dict):\n        self._rewards.append(reward)\n\n        if terminal and train_policy:\n            self._train_policy(feed_dict=feed_dict)\n\n    def _train_policy(self, feed_dict=None):\n        \"\"\"Updates the policy.\n\n        Args:\n            TODO\n        \"\"\"\n        qvalues = discount_reward(\n            [self._rewards], discount=self._hparams.discount_factor,\n            normalize=self._hparams.normalize_reward)\n        qvalues = qvalues[0, :]\n\n        fetches = dict(loss=self._train_op)\n        feed_dict_ = {\n            self._observ_inputs: self._observs,\n            self._action_inputs: self._actions,\n            self._advantage_inputs: qvalues}\n        feed_dict_.update(feed_dict or {})\n\n        self._train_outputs = self._sess.run(fetches, feed_dict=feed_dict_)\n\n    @property\n    def sess(self):\n        \"\"\"The tf session.\n        \"\"\"\n        return self._sess\n\n    @sess.setter\n    def sess(self, session):\n        self._sess = session\n\n    @property\n    def policy(self):\n        \"\"\"The policy model.\n        \"\"\"\n        return self._policy\n"
  },
  {
    "path": "texar_repo/texar/agents/seq_agent_base.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nBase class for reinforcement learning agents for sequence prediction.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom texar.agents.agent_base import AgentBase\n\n# pylint: disable=too-many-instance-attributes\n\nclass SeqAgentBase(AgentBase):\n    \"\"\"\n    Base class inherited by sequence prediction RL agents.\n\n    Args:\n        TODO\n    \"\"\"\n    def __init__(self, hparams=None):\n        AgentBase.__init__(self, hparams)\n\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        TODO\n        \"\"\"\n        return {\n            'name': 'agent'\n        }\n\n"
  },
  {
    "path": "texar_repo/texar/agents/seq_pg_agent.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Policy Gradient agent for sequence prediction.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=too-many-instance-attributes, too-many-arguments, no-member\n\nimport tensorflow as tf\n\nfrom texar.agents.seq_agent_base import SeqAgentBase\nfrom texar.core import optimization as opt\nfrom texar.losses.pg_losses import pg_loss_with_logits\nfrom texar.losses.rewards import discount_reward\nfrom texar.losses.entropy import sequence_entropy_with_logits\n\n__all__ = [\n    \"SeqPGAgent\"\n]\n\nclass SeqPGAgent(SeqAgentBase):\n    \"\"\"Policy Gradient agent for sequence prediction.\n\n    This is a wrapper of the **training process** that trains a model\n    with policy gradient. Agent itself does not create new trainable variables.\n\n    Args:\n        samples: An `int` Tensor of shape `[batch_size, max_time]` containing\n            sampled sequences from the model.\n        logits: A float Tenosr of shape `[batch_size, max_time, vocab_size]`\n            containing the logits of samples from the model.\n        sequence_length: A Tensor of shape `[batch_size]`.\n            Time steps beyond the respective sequence lengths are masked out.\n        trainable_variables (optional): Trainable variables of the model to\n            update during training. If `None`, all trainable variables in the\n            graph are used.\n        learning_rate (optional): Learning rate for policy optimization. If\n            not given, determine the learning rate from :attr:`hparams`.\n            See :func:`~texar.core.get_train_op` for more details.\n        sess (optional): A tf session.\n            Can be `None` here and set later with `agent.sess = session`.\n        hparams (dict or HParams, optional): Hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n\n    Example:\n\n        .. code-block:: python\n\n            ## Train a decoder with policy gradient\n            decoder = BasicRNNDecoder(...)\n            outputs, _, sequence_length = decoder(\n                decoding_strategy='infer_sample', ...)\n\n            sess = tf.Session()\n            agent = SeqPGAgent(\n                samples=outputs.sample_id,\n                logits=outputs.logits,\n                sequence_length=sequence_length,\n                sess=sess)\n            while training:\n                # Generate samples\n                vals = agent.get_samples()\n                # Evaluate reward\n                sample_text = tx.utils.map_ids_to_strs(vals['samples'], vocab)\n                reward_bleu = []\n                for y, y_ in zip(ground_truth, sample_text)\n                    reward_bleu.append(tx.evals.sentence_bleu(y, y_)\n                # Update\n                agent.observe(reward=reward_bleu)\n    \"\"\"\n    def __init__(self,\n                 samples,\n                 logits,\n                 sequence_length,\n                 trainable_variables=None,\n                 learning_rate=None,\n                 sess=None,\n                 hparams=None):\n        SeqAgentBase.__init__(self, hparams)\n\n        self._lr = learning_rate\n\n        # Tensors\n        self._samples = samples\n        self._logits = logits\n        self._sequence_length = sequence_length\n        self._trainable_variables = trainable_variables\n\n        # Python values\n        self._samples_py = None\n        self._sequence_length_py = None\n        self._rewards = None\n\n        self._sess = sess\n\n        # For session partial run\n        self._partial_run_handle = None\n        self._qvalue_inputs_fed = False\n\n        self._build_graph()\n\n    def _build_graph(self):\n        with tf.variable_scope(self.variable_scope):\n            self._qvalue_inputs = tf.placeholder(\n                dtype=tf.float32,\n                shape=[None, None],\n                name='qvalue_inputs')\n            self._pg_loss = self._get_pg_loss()\n            self._train_op = self._get_train_op()\n\n    def _get_pg_loss(self):\n        loss_hparams = self._hparams.loss\n        pg_loss = pg_loss_with_logits(\n            actions=self._samples,\n            logits=self._logits,\n            sequence_length=self._sequence_length,\n            advantages=self._qvalue_inputs,\n            batched=True,\n            average_across_batch=loss_hparams.average_across_batch,\n            average_across_timesteps=loss_hparams.average_across_timesteps,\n            sum_over_batch=loss_hparams.sum_over_batch,\n            sum_over_timesteps=loss_hparams.sum_over_timesteps,\n            time_major=loss_hparams.time_major)\n\n        if self._hparams.entropy_weight > 0:\n            entropy = self._get_entropy()\n            pg_loss -= self._hparams.entropy_weight * entropy\n\n        return pg_loss\n\n    def _get_entropy(self):\n        loss_hparams = self._hparams.loss\n        return sequence_entropy_with_logits(\n            self._logits,\n            sequence_length=self._sequence_length,\n            average_across_batch=loss_hparams.average_across_batch,\n            average_across_timesteps=loss_hparams.average_across_timesteps,\n            sum_over_batch=loss_hparams.sum_over_batch,\n            sum_over_timesteps=loss_hparams.sum_over_timesteps,\n            time_major=loss_hparams.time_major)\n\n    def _get_train_op(self):\n        train_op = opt.get_train_op(\n            loss=self._pg_loss,\n            variables=self._trainable_variables,\n            learning_rate=self._lr,\n            hparams=self._hparams.optimization.todict())\n        return train_op\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values:\n\n        .. role:: python(code)\n           :language: python\n\n        .. code-block:: python\n\n            {\n                'discount_factor': 0.95,\n                'normalize_reward': False,\n                'entropy_weight': 0.,\n                'loss': {\n                    'average_across_batch': True,\n                    'average_across_timesteps': False,\n                    'sum_over_batch': False,\n                    'sum_over_timesteps': True,\n                    'time_major': False\n                },\n                'optimization': default_optimization_hparams(),\n                'name': 'pg_agent',\n            }\n\n        Here:\n\n        \"discount_factor\" : float\n            The discount factor of reward.\n\n        \"normalize_reward\" : bool\n            Whether to normalize the discounted reward, by\n            `(discounted_reward - mean) / std`. Here `mean` and `std` are\n            over all time steps and all samples in the batch.\n\n        \"entropy_weight\" : float\n            The weight of entropy loss of the sample distribution, to encourage\n            maximizing the Shannon entropy. Set to 0 to disable the loss.\n\n        \"loss\" : dict\n            Extra keyword arguments for\n            :func:`~texar.losses.pg_loss_with_logits`, including the\n            reduce arguments (e.g., `average_across_batch`) and `time_major`\n\n        \"optimization\" : dict\n            Hyperparameters of optimization for updating the policy net.\n            See :func:`~texar.core.default_optimization_hparams` for details.\n\n        \"name\" : str\n            Name of the agent.\n        \"\"\"\n        return {\n            'discount_factor': 0.95,\n            'normalize_reward': False,\n            'entropy_weight': 0.,\n            'loss': {\n                'average_across_batch': True,\n                'average_across_timesteps': False,\n                'sum_over_batch': False,\n                'sum_over_timesteps': True,\n                'time_major': False\n            },\n            'optimization': opt.default_optimization_hparams(),\n            'name': 'pg_agent',\n        }\n\n    def _get_partial_run_feeds(self, feeds=None):\n        if feeds is None:\n            feeds = []\n        feeds += [self._qvalue_inputs]\n        return feeds\n\n    def _setup_partial_run(self, fetches=None, feeds=None):\n        fetches_ = [self._samples, self._sequence_length, self._pg_loss,\n                    self._train_op]\n        if fetches is not None:\n            for fet in fetches:\n                if fet not in fetches_:\n                    fetches_.append(fet)\n\n        feeds = self._get_partial_run_feeds(feeds)\n\n        self._partial_run_handle = self._sess.partial_run_setup(\n            fetches_, feeds=feeds)\n\n        self._qvalue_inputs_fed = False\n\n    def _check_extra_fetches(self, extra_fetches):\n        fetch_values = None\n        if extra_fetches is not None:\n            fetch_values = list(extra_fetches.values())\n        if fetch_values is not None:\n            if self._samples in fetch_values:\n                raise ValueError(\n                    \"`samples` must not be included in `extra_fetches`. \"\n                    \"It is added automatically.\")\n            if self._sequence_length in fetch_values:\n                raise ValueError(\n                    \"`sequence_length` must not be included in `extra_fetches`.\"\n                    \" It is added automatically.\")\n            if \"samples\" in extra_fetches:\n                raise ValueError(\n                    \"Key 'samples' is preserved and must not be used \"\n                    \"in `extra_fetches`.\")\n            if \"sequence_length\" in extra_fetches:\n                raise ValueError(\n                    \"Key 'sequence_length' is preserved and must not be used \"\n                    \"in `extra_fetches`.\")\n\n    def get_samples(self, extra_fetches=None, feed_dict=None):\n        \"\"\"Returns sequence samples and extra results.\n\n        Args:\n            extra_fetches (dict, optional): Extra tensors to fetch values,\n                besides `samples` and `sequence_length`. Same as the\n                `fetches` argument of\n                :tf_main:`tf.Session.run <Session#run>` and\n                tf_main:`partial_run <Session#partial_run>`.\n            feed_dict (dict, optional): A `dict` that maps tensor to\n                values. Note that all placeholder values used in\n                :meth:`get_samples` and subsequent :meth:`observe` calls\n                should be fed here.\n\n        Returns:\n            A `dict` with keys **\"samples\"** and **\"sequence_length\"**\n            containing the fetched values of :attr:`samples` and\n            :attr:`sequence_length`, as well as other fetched values\n            as specified in :attr:`extra_fetches`.\n\n        Example:\n\n            .. code-block:: python\n\n                extra_fetches = {'truth_ids': data_batch['text_ids']}\n                vals = agent.get_samples()\n                sample_text = tx.utils.map_ids_to_strs(vals['samples'], vocab)\n                truth_text = tx.utils.map_ids_to_strs(vals['truth_ids'], vocab)\n                reward = reward_fn_in_python(truth_text, sample_text)\n        \"\"\"\n        if self._sess is None:\n            raise ValueError(\"`sess` must be specified before sampling.\")\n\n        self._check_extra_fetches(extra_fetches)\n\n        # Sets up partial_run\n        fetch_values = None\n        if extra_fetches is not None:\n            fetch_values = list(extra_fetches.values())\n        feeds = None\n        if feed_dict is not None:\n            feeds = list(feed_dict.keys())\n        self._setup_partial_run(fetches=fetch_values, feeds=feeds)\n\n        # Runs the sampling\n        fetches = {\n            \"samples\": self._samples,\n            \"sequence_length\": self._sequence_length\n        }\n        if extra_fetches is not None:\n            fetches.update(extra_fetches)\n\n        feed_dict_ = feed_dict\n\n        vals = self._sess.partial_run(\n            self._partial_run_handle, fetches, feed_dict=feed_dict_)\n\n        self._samples_py = vals['samples']\n        self._sequence_length_py = vals['sequence_length']\n\n        return vals\n\n    def observe(self, reward, train_policy=True, compute_loss=True):\n        \"\"\"Observes the reward, and updates the policy or computes loss\n        accordingly.\n\n        Args:\n            reward: A Python array/list of shape `[batch_size]` containing\n                the reward for the samples generated in last call of\n                :meth:`get_samples`.\n            train_policy (bool): Whether to update the policy model according\n                to the reward.\n            compute_loss (bool): If `train_policy` is False, whether to\n                compute the policy gradient loss (but does not update the\n                policy).\n\n        Returns:\n            If `train_policy` or `compute_loss` is True, returns the loss\n            (a python float scalar). Otherwise returns `None`.\n        \"\"\"\n        self._rewards = reward\n\n        if train_policy:\n            return self._train_policy()\n        elif compute_loss:\n            return self._evaluate_pg_loss()\n        else:\n            return None\n\n    def _get_qvalues(self):\n        qvalues = discount_reward(\n            self._rewards,\n            self._sequence_length_py,\n            discount=self._hparams.discount_factor,\n            normalize=self._hparams.normalize_reward)\n        return qvalues\n\n    def _evaluate_pg_loss(self):\n        fetches = {\n            \"loss\": self._pg_loss\n        }\n\n        feed_dict_ = None\n        if not self._qvalue_inputs_fed:\n            qvalues = self._get_qvalues()\n            feed_dict_ = {self._qvalue_inputs: qvalues}\n\n        vals = self._sess.partial_run(\n            self._partial_run_handle, fetches, feed_dict=feed_dict_)\n\n        self._qvalue_inputs_fed = True\n\n        return vals['loss']\n\n    def _train_policy(self):\n        \"\"\"Updates the policy.\n        \"\"\"\n        fetches = {\n            \"loss\": self._train_op,\n        }\n\n        feed_dict_ = None\n        if not self._qvalue_inputs_fed:\n            qvalues = self._get_qvalues()\n            feed_dict_ = {self._qvalue_inputs: qvalues}\n\n        vals = self._sess.partial_run(\n            self._partial_run_handle, fetches, feed_dict=feed_dict_)\n\n        self._qvalue_inputs_fed = True\n\n        return vals['loss']\n\n    @property\n    def sess(self):\n        \"\"\"The tf session.\n        \"\"\"\n        return self._sess\n\n    @sess.setter\n    def sess(self, sess):\n        self._sess = sess\n\n    @property\n    def pg_loss(self):\n        \"\"\"The scalar tensor of policy gradient loss.\n        \"\"\"\n        return self._pg_loss\n\n    @property\n    def sequence_length(self):\n        \"\"\"The tensor of sample sequence length, of shape `[batch_size]`.\n        \"\"\"\n        return self._sequence_length\n\n    @property\n    def samples(self):\n        \"\"\"The tensor of sequence samples.\n        \"\"\"\n        return self._samples\n\n    @property\n    def logits(self):\n        \"\"\"The tensor of sequence logits.\n        \"\"\"\n        return self._logits\n"
  },
  {
    "path": "texar_repo/texar/agents/seq_pg_agent_test.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nUnit tests for sequence prediction policy gradient agents.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport tensorflow as tf\n\nfrom texar.modules.decoders.rnn_decoders import BasicRNNDecoder\nfrom texar.agents import SeqPGAgent\nfrom texar import context\n\nclass SeqPGAgentTest(tf.test.TestCase):\n    \"\"\"Tests :class:`texar.agents.SeqPGAgent`\n    \"\"\"\n\n    def setUp(self):\n        tf.test.TestCase.setUp(self)\n        self._vocab_size = 4\n        self._max_time = 8\n        self._batch_size = 16\n        self._emb_dim = 20\n        self._inputs = tf.random_uniform(\n            [self._batch_size, self._max_time, self._emb_dim],\n            maxval=1., dtype=tf.float32)\n        self._embedding = tf.random_uniform(\n            [self._vocab_size, self._emb_dim], maxval=1., dtype=tf.float32)\n\n    def test_seq_pg_agent(self):\n        \"\"\"Tests logits.\n        \"\"\"\n        decoder = BasicRNNDecoder(vocab_size=self._vocab_size)\n        outputs, _, sequence_length = decoder(\n            decoding_strategy=\"infer_greedy\",\n            max_decoding_length=10,\n            embedding=self._embedding,\n            start_tokens=[1]*self._batch_size,\n            end_token=2)\n\n        agent = SeqPGAgent(\n            outputs.sample_id, outputs.logits, sequence_length,\n            decoder.trainable_variables)\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n\n            agent.sess = sess\n\n            feed_dict = {context.global_mode(): tf.estimator.ModeKeys.TRAIN}\n            for _ in range(2):\n                vals = agent.get_samples(feed_dict=feed_dict)\n                self.assertEqual(vals['samples'].shape[0], self._batch_size)\n\n                loss_1 = agent.observe([1.]*self._batch_size)\n                loss_2 = agent.observe(\n                    [1.]*self._batch_size, train_policy=False)\n                self.assertEqual(loss_1.shape, ())\n                self.assertEqual(loss_2.shape, ())\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/context.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nGlobal context manager that handles train/infer mode, etc\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\n\nimport tensorflow as tf\n\n__all__ = [\n    \"global_mode\",\n    \"global_mode_train\",\n    \"global_mode_eval\",\n    \"global_mode_predict\",\n    \"valid_modes\"\n]\n\n_GLOBAL_MODE_KEY = \"GLOBAL_MODE\"\n\ndef global_mode():\n    \"\"\"Returns the Tensor of global mode.\n\n    This is a placeholder with default value of\n    :tf_main:`tf.estimator.ModeKeys.TRAIN <estimator/ModeKeys>`.\n\n    Example:\n\n        .. code-block:: python\n\n            mode = session.run(global_mode())\n            # mode == tf.estimator.ModeKeys.TRAIN\n\n            mode = session.run(\n                global_mode(),\n                feed_dict={tf.global_mode(): tf.estimator.ModeKeys.PREDICT})\n            # mode == tf.estimator.ModeKeys.PREDICT\n    \"\"\"\n    mode = tf.get_collection_ref(_GLOBAL_MODE_KEY)\n    if len(mode) < 1:\n        #mode_tensor = tf.placeholder(tf.string, name=\"global_mode\")\n        mode_tensor = tf.placeholder_with_default(\n            input=tf.estimator.ModeKeys.TRAIN,\n            shape=(),\n            name=\"global_mode\")\n        #mode_tensor = tf.constant(\n        #    value=tf.estimator.ModeKeys.TRAIN,\n        #    dtype=tf.string,\n        #    name=\"global_mode\")\n        mode.append(mode_tensor)\n    return mode[0]\n\ndef global_mode_train():\n    \"\"\"Returns a bool Tensor indicating whether the global mode is TRAIN.\n\n    Example:\n\n        .. code-block:: python\n\n            is_train = session.run(global_mode_train())\n            # is_train == True\n\n            is_train = session.run(\n                global_mode_train()\n                feed_dict={tf.global_mode(): tf.estimator.ModeKeys.PREDICT})\n            # is_train == False\n    \"\"\"\n    mode = global_mode()\n    return tf.equal(mode, tf.estimator.ModeKeys.TRAIN)\n\ndef global_mode_eval():\n    \"\"\"Returns a bool Tensor indicating whether the global mode is EVAL.\n    \"\"\"\n    mode = global_mode()\n    return tf.equal(mode, tf.estimator.ModeKeys.EVAL)\n\ndef global_mode_predict():\n    \"\"\"Returns a bool Tensor indicating whether the global mode is PREDICT.\n    \"\"\"\n    mode = global_mode()\n    return tf.equal(mode, tf.estimator.ModeKeys.PREDICT)\n\ndef valid_modes():\n    \"\"\"Returns a set of possible values of mode.\n    \"\"\"\n    return {tf.estimator.ModeKeys.TRAIN,\n            tf.estimator.ModeKeys.EVAL,\n            tf.estimator.ModeKeys.PREDICT}\n"
  },
  {
    "path": "texar_repo/texar/context_test.py",
    "content": "# -*- coding: utf-8 -*-\n#\n\"\"\"\nUnit tests for various context functionalities.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport tensorflow as tf\n\nfrom texar import context\n\n# pylint: disable=protected-access\n\nclass ContextTest(tf.test.TestCase):\n    \"\"\"Tests context.\n    \"\"\"\n\n    def test_global_mode(self):\n        \"\"\"Tests the mode context manager.\n        \"\"\"\n        global_mode = context.global_mode()\n        self.assertIsInstance(global_mode, tf.Tensor)\n\n        mode_train = context.global_mode_train()\n        mode_eval = context.global_mode_eval()\n        mode_predict = context.global_mode_predict()\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n\n            global_mode_ = sess.run(global_mode)\n            self.assertEqual(tf.compat.as_str(global_mode_),\n                             tf.estimator.ModeKeys.TRAIN)\n\n            global_mode_, mode_train_, mode_eval_, mode_predict_ = sess.run(\n                [global_mode, mode_train, mode_eval, mode_predict],\n                feed_dict={context.global_mode(): tf.estimator.ModeKeys.TRAIN})\n            self.assertEqual(global_mode_, tf.estimator.ModeKeys.TRAIN)\n            self.assertTrue(mode_train_)\n            self.assertFalse(mode_eval_)\n            self.assertFalse(mode_predict_)\n\n            global_mode_, mode_train_, mode_eval_, mode_predict_ = sess.run(\n                [global_mode, mode_train, mode_eval, mode_predict],\n                feed_dict={context.global_mode(): tf.estimator.ModeKeys.EVAL})\n            self.assertEqual(global_mode_, tf.estimator.ModeKeys.EVAL)\n            self.assertFalse(mode_train_)\n            self.assertTrue(mode_eval_)\n            self.assertFalse(mode_predict_)\n\n            global_mode_, mode_train_, mode_eval_, mode_predict_ = sess.run(\n                [global_mode, mode_train, mode_eval, mode_predict],\n                feed_dict={context.global_mode():\n                           tf.estimator.ModeKeys.PREDICT})\n            self.assertEqual(global_mode_, tf.estimator.ModeKeys.PREDICT)\n            self.assertFalse(mode_train_)\n            self.assertFalse(mode_eval_)\n            self.assertTrue(mode_predict_)\n\n        global_mode_values = tf.get_collection_ref(context._GLOBAL_MODE_KEY)\n        self.assertEqual(len(global_mode_values), 1)\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/core/__init__.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nModules of texar core.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=wildcard-import\n\nfrom texar.core.layers import *\nfrom texar.core.replay_memories import *\nfrom texar.core.explorations import *\nfrom texar.core.optimization import *\n"
  },
  {
    "path": "texar_repo/texar/core/explorations.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nClasses and utilities for exploration in RL.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom texar.hyperparams import HParams\n\n# pylint: disable=invalid-name\n\n__all__ = [\n    \"ExplorationBase\",\n    \"EpsilonLinearDecayExploration\"\n]\n\nclass ExplorationBase(object):\n    \"\"\"Base class inherited by all exploration classes.\n\n    Args:\n        hparams (dict or HParams, optional): Hyperparameters. Missing\n            hyperparameters are set to default values. See\n            :meth:`default_hparams` for the defaults.\n    \"\"\"\n    def __init__(self, hparams=None):\n        self._hparams = HParams(hparams, self.default_hparams())\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a `dict` of hyperparameters and their default values.\n\n        .. code-block:: python\n\n            {\n                'name': 'exploration_base'\n            }\n        \"\"\"\n        return {\n            'name': 'exploration_base'\n        }\n\n    def get_epsilon(self, timestep):\n        \"\"\"Returns the epsilon value.\n\n        Args:\n            timestep (int): The time step.\n\n        Returns:\n            float: the epsilon value.\n        \"\"\"\n        raise NotImplementedError\n\n    @property\n    def hparams(self):\n        \"\"\"The hyperparameter.\n        \"\"\"\n        return self._hparams\n\n\nclass EpsilonLinearDecayExploration(ExplorationBase):\n    \"\"\"Decays epsilon linearly.\n\n    Args:\n        hparams (dict or HParams, optional): Hyperparameters. Missing\n            hyperparameters are set to default values. See\n            :meth:`default_hparams` for the defaults.\n    \"\"\"\n    def __init__(self, hparams=None):\n        ExplorationBase.__init__(self, hparams=hparams)\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a `dict` of hyperparameters and their default values.\n\n        .. code-block:: python\n\n            {\n                'initial_epsilon': 0.1,\n                'final_epsilon': 0.0,\n                'decay_timesteps': 20000,\n                'start_timestep': 0,\n                'name': 'epsilon_linear_decay_exploration',\n            }\n\n        This specifies the decay process that starts at\n        \"start_timestep\" with the value \"initial_epsilon\", and decays for\n        steps \"decay_timesteps\" to reach the final epsilon value\n        \"final_epsilon\".\n        \"\"\"\n        return {\n            'name': 'epsilon_linear_decay_exploration',\n            'initial_epsilon': 0.1,\n            'final_epsilon': 0.0,\n            'decay_timesteps': 20000,\n            'start_timestep': 0\n        }\n\n    def get_epsilon(self, timestep):\n        nsteps = self._hparams.decay_timesteps\n        st = self._hparams.start_timestep\n        et = st + nsteps\n\n        if timestep <= st:\n            return self._hparams.initial_epsilon\n        if timestep > et:\n            return self._hparams.final_epsilon\n        r = (timestep - st) * 1.0 / nsteps\n        epsilon = (1 - r) * self._hparams.initial_epsilon + \\\n                r * self._hparams.final_epsilon\n\n        return epsilon\n\n"
  },
  {
    "path": "texar_repo/texar/core/layers.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nVarious neural network layers\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\n\nimport copy\n\nimport tensorflow as tf\nimport tensorflow.contrib.rnn as rnn\n\nfrom texar.hyperparams import HParams\nfrom texar.utils import utils\nfrom texar.utils.dtypes import is_str\nfrom texar.utils.variables import add_variable\nfrom texar.utils.mode import is_train_mode, switch_dropout\n\n# pylint: disable=redefined-variable-type, invalid-name\n# pylint: disable=too-many-branches, too-many-arguments, too-many-lines\n# pylint: disable=protected-access\n\n__all__ = [\n    \"default_rnn_cell_hparams\",\n    \"get_rnn_cell\",\n    \"get_rnn_cell_trainable_variables\",\n    \"default_regularizer_hparams\",\n    \"get_regularizer\",\n    \"get_initializer\",\n    \"get_activation_fn\",\n    \"get_constraint_fn\",\n    \"get_layer\",\n    \"_ReducePooling1D\",\n    \"MaxReducePooling1D\",\n    \"AverageReducePooling1D\",\n    \"get_pooling_layer_hparams\",\n    \"MergeLayer\",\n    \"SequentialLayer\",\n    \"default_conv1d_kwargs\",\n    \"default_conv2d_kwargs\",\n    \"default_conv3d_kwargs\",\n    \"default_conv2d_transpose_kwargs\",\n    \"default_conv3d_transpose_kwargs\",\n    \"default_dense_kwargs\",\n    \"default_dropout_kwargs\",\n    \"default_flatten_kwargs\",\n    \"default_max_pooling1d_kwargs\",\n    \"default_max_pooling2d_kwargs\",\n    \"default_max_pooling3d_kwargs\",\n    \"default_separable_conv2d_kwargs\",\n    \"default_batch_normalization_kwargs\",\n    \"default_average_pooling1d_kwargs\",\n    \"default_average_pooling2d_kwargs\",\n    \"default_average_pooling3d_kwargs\",\n    \"layer_normalize\",\n]\n\ndef default_rnn_cell_hparams():\n    \"\"\"Returns a `dict` of RNN cell hyperparameters and their default values.\n\n    .. role:: python(code)\n       :language: python\n\n    .. code-block:: python\n\n        {\n            \"type\": \"LSTMCell\",\n            \"kwargs\": {\n                \"num_units\": 256\n            },\n            \"num_layers\": 1,\n            \"dropout\": {\n                \"input_keep_prob\": 1.0,\n                \"output_keep_prob\": 1.0,\n                \"state_keep_prob\": 1.0,\n                \"variational_recurrent\": False,\n                \"input_size\": []\n            },\n            \"residual\": False,\n            \"highway\": False,\n        }\n\n    Here:\n\n    \"type\" : str or cell class or cell instance\n        The RNN cell type. This can be\n\n        - The string name or full module path of a cell class. If class \\\n        name is provided, the class must be in module \\\n        :tf_main:`tf.nn.rnn_cell <nn/rnn_cell/LSTMCell>`, \\\n        :tf_main:`tf.contrib.rnn <contrib/rnn>`, or :mod:`texar.custom`.\n        - A cell class.\n        - An instance of a cell class. This is not valid if \\\n        \"num_layers\" > 1.\n\n        For example\n\n        .. code-block:: python\n\n            \"type\": \"LSTMCell\" # class name\n            \"type\": \"tensorflow.contrib.rnn.Conv1DLSTMCell\" # module path\n            \"type\": \"my_module.MyCell\" # module path\n            \"type\": tf.nn.rnn_cell.GRUCell # class\n            \"type\": BasicRNNCell(num_units=100) # cell instance\n            \"type\": MyCell(...) # cell instance\n\n    \"kwargs\" : dict\n        Keyword arguments for the constructor of the cell class.\n        A cell is created by :python:`cell_class(**kwargs)`, where\n        `cell_class` is specified in \"type\" above.\n\n        Ignored if \"type\" is a cell instance.\n\n    \"num_layers\" : int\n        Number of cell layers. Each layer is a cell created as above, with\n        the same hyperparameters specified in \"kwargs\".\n\n    \"dropout\" : dict\n        Dropout applied to the cell in **each** layer. See\n        :tf_main:`DropoutWrapper <contrib/rnn/DropoutWrapper>` for details of\n        the hyperparameters. If all \"\\*_keep_prob\" = 1, no dropout is applied.\n\n        Specifically, if \"variational_recurrent\" = `True`,\n        the same dropout mask is applied across all time steps per run call.\n        If `True`, \"input_size\" is required, which is a list of input\n        size of each cell layer. The input size of a cell layer is the last\n        dimension size of its input tensor. For example, the\n        input size of the first layer is usually the dimension of\n        word embeddings, while the input size of subsequent layers\n        are usually the `num_units` of the preceding-layer cell. E.g.,\n\n        .. code-block:: python\n\n            # Assume embedding_dim = 100\n            \"type\": \"LSTMCell\",\n            \"kwargs\": { \"num_units\": 123 },\n            \"num_layers\": 3,\n            \"dropout\": {\n                \"output_keep_prob\": 0.5,\n                \"variational_recurrent\": True,\n                \"input_size\": [100, 123, 123]\n            }\n\n    \"residual\" : bool\n        If `True`, apply residual connection on the inputs and\n        outputs of cell in **each** layer except the first layer. Ignored\n        if \"num_layers\" = 1.\n\n    \"highway\" : bool\n        If True, apply highway connection on the inputs and\n        outputs of cell in each layer except the first layer. Ignored if\n        \"num_layers\" = 1.\n    \"\"\"\n    return {\n        \"type\": \"LSTMCell\",\n        \"kwargs\": {\n            \"num_units\": 256,\n        },\n        \"num_layers\": 1,\n        \"dropout\": {\n            \"input_keep_prob\": 1.0,\n            \"output_keep_prob\": 1.0,\n            \"state_keep_prob\": 1.0,\n            \"variational_recurrent\": False,\n            \"input_size\": [],\n            \"@no_typecheck\": [\n                \"input_keep_prob\", \"output_keep_prob\", \"state_keep_prob\"\n            ]\n        },\n        \"residual\": False,\n        \"highway\": False,\n        \"@no_typecheck\": [\"type\"]\n    }\n\ndef get_rnn_cell(hparams=None, mode=None):\n    \"\"\"Creates an RNN cell.\n\n    See :func:`~texar.core.default_rnn_cell_hparams` for all\n    hyperparameters and default values.\n\n    Args:\n        hparams (dict or HParams, optional): Cell hyperparameters. Missing\n            hyperparameters are set to default values.\n        mode (optional): A Tensor taking value in\n            :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, including\n            `TRAIN`, `EVAL`, and `PREDICT`. If `None`, dropout will be\n            controlled by :func:`texar.global_mode`.\n\n    Returns:\n        A cell instance.\n\n    Raises:\n        ValueError: If hparams[\"num_layers\"]>1 and hparams[\"type\"] is a class\n            instance.\n        ValueError: The cell is not an\n            :tf_main:`RNNCell <contrib/rnn/RNNCell>` instance.\n    \"\"\"\n    if hparams is None or isinstance(hparams, dict):\n        hparams = HParams(hparams, default_rnn_cell_hparams())\n\n    d_hp = hparams[\"dropout\"]\n    if d_hp[\"variational_recurrent\"] and \\\n            len(d_hp[\"input_size\"]) != hparams[\"num_layers\"]:\n        raise ValueError(\n            \"If variational_recurrent=True, input_size must be a list of \"\n            \"num_layers(%d) integers. Got len(input_size)=%d.\" %\n            (hparams[\"num_layers\"], len(d_hp[\"input_size\"])))\n\n    cells = []\n    cell_kwargs = hparams[\"kwargs\"].todict()\n    num_layers = hparams[\"num_layers\"]\n    for layer_i in range(num_layers):\n        # Create the basic cell\n        cell_type = hparams[\"type\"]\n        if not is_str(cell_type) and not isinstance(cell_type, type):\n            if num_layers > 1:\n                raise ValueError(\n                    \"If 'num_layers'>1, then 'type' must be a cell class or \"\n                    \"its name/module path, rather than a cell instance.\")\n        cell_modules = ['tensorflow.nn.rnn_cell', 'tensorflow.contrib.rnn',\n                        'texar.custom']\n        cell = utils.check_or_get_instance(\n            cell_type, cell_kwargs, cell_modules, rnn.RNNCell)\n\n        # Optionally add dropout\n        if d_hp[\"input_keep_prob\"] < 1.0 or \\\n                d_hp[\"output_keep_prob\"] < 1.0 or \\\n                d_hp[\"state_keep_prob\"] < 1.0:\n            vr_kwargs = {}\n            if d_hp[\"variational_recurrent\"]:\n                vr_kwargs = {\n                    \"variational_recurrent\": True,\n                    \"input_size\": d_hp[\"input_size\"][layer_i],\n                    \"dtype\": tf.float32\n                }\n            input_keep_prob = switch_dropout(d_hp[\"input_keep_prob\"],\n                                             mode)\n            output_keep_prob = switch_dropout(d_hp[\"output_keep_prob\"],\n                                              mode)\n            state_keep_prob = switch_dropout(d_hp[\"state_keep_prob\"],\n                                             mode)\n            cell = rnn.DropoutWrapper(\n                cell=cell,\n                input_keep_prob=input_keep_prob,\n                output_keep_prob=output_keep_prob,\n                state_keep_prob=state_keep_prob,\n                **vr_kwargs)\n\n        # Optionally add residual and highway connections\n        if layer_i > 0:\n            if hparams[\"residual\"]:\n                cell = rnn.ResidualWrapper(cell)\n            if hparams[\"highway\"]:\n                cell = rnn.HighwayWrapper(cell)\n\n        cells.append(cell)\n\n    if hparams[\"num_layers\"] > 1:\n        cell = rnn.MultiRNNCell(cells)\n    else:\n        cell = cells[0]\n\n    return cell\n\ndef get_rnn_cell_trainable_variables(cell):\n    \"\"\"Returns the list of trainable variables of an RNN cell.\n\n    Args:\n        cell: an instance of :tf_main:`RNNCell <nn/rnn_cell/RNNCell>`.\n\n    Returns:\n        list: trainable variables of the cell.\n    \"\"\"\n    cell_ = cell\n    while True:\n        try:\n            return cell_.trainable_variables\n        except AttributeError:\n        # Cell wrappers (e.g., `DropoutWrapper`) cannot directly access to\n        # `trainable_variables` as they don't initialize superclass\n        # (tf==v1.3). So try to access through the cell in the wrapper.\n            cell_ = cell._cell  # pylint: disable=protected-access\n\ndef default_regularizer_hparams():\n    \"\"\"Returns the hyperparameters and their default values of a variable\n    regularizer:\n\n    .. code-block:: python\n\n        {\n            \"type\": \"L1L2\",\n            \"kwargs\": {\n                \"l1\": 0.,\n                \"l2\": 0.\n            }\n        }\n\n    The default value corresponds to :tf_main:`L1L2 <keras/regularizers/L1L2>`\n    and, with `(l1=0, l2=0)`, disables regularization.\n    \"\"\"\n    return {\n        \"type\": \"L1L2\",\n        \"kwargs\": {\n            \"l1\": 0.,\n            \"l2\": 0.\n        }\n    }\n\ndef get_regularizer(hparams=None):\n    \"\"\"Returns a variable regularizer instance.\n\n    See :func:`~texar.core.default_regularizer_hparams` for all\n    hyperparameters and default values.\n\n    The \"type\" field can be a subclass\n    of :tf_main:`Regularizer <keras/regularizers/Regularizer>`, its string name\n    or module path, or a class instance.\n\n    Args:\n        hparams (dict or HParams, optional): Hyperparameters. Missing\n            hyperparameters are set to default values.\n\n    Returns:\n        A :tf_main:`Regularizer <keras/regularizers/Regularizer>` instance.\n        `None` if :attr:`hparams` is `None` or taking the default\n        hyperparameter value.\n\n    Raises:\n        ValueError: The resulting regularizer is not an instance of\n            :tf_main:`Regularizer <keras/regularizers/Regularizer>`.\n    \"\"\"\n    if hparams is None:\n        return None\n\n    if isinstance(hparams, dict):\n        hparams = HParams(hparams, default_regularizer_hparams())\n\n    rgl = utils.check_or_get_instance(\n        hparams.type, hparams.kwargs.todict(),\n        [\"tensorflow.keras.regularizers\", \"texar.custom\"])\n\n    if not isinstance(rgl, tf.keras.regularizers.Regularizer):\n        raise ValueError(\"The regularizer must be an instance of \"\n                         \"tf.keras.regularizers.Regularizer.\")\n\n    if isinstance(rgl, tf.keras.regularizers.L1L2) and \\\n            rgl.l1 == 0. and rgl.l2 == 0.:\n        return None\n\n    return rgl\n\ndef get_initializer(hparams=None):\n    \"\"\"Returns an initializer instance.\n\n    .. role:: python(code)\n       :language: python\n\n    Args:\n        hparams (dict or HParams, optional): Hyperparameters with the structure\n\n            .. code-block:: python\n\n                {\n                    \"type\": \"initializer_class_or_function\",\n                    \"kwargs\": {\n                        #...\n                    }\n                }\n\n            The \"type\" field can be a initializer class, its name or module\n            path, or class instance. If class name is provided, the class must\n            be from one the following modules:\n            :tf_main:`tf.initializers <initializers>`,\n            :tf_main:`tf.keras.initializers <keras/initializers>`,\n            :tf_main:`tf < >`, and :mod:`texar.custom`. The class is created\n            by :python:`initializer_class(**kwargs)`. If a class instance\n            is given, \"kwargs\" is ignored and can be omitted.\n\n            Besides, the \"type\" field can also be an initialization function\n            called with :python:`initialization_fn(**kwargs)`. In this case\n            \"type\" can be the function, or its name or module path. If\n            function name is provided, the function must be from one of the\n            above modules or module `tf.contrib.layers`. If no\n            keyword argument is required, \"kwargs\" can be omitted.\n\n    Returns:\n        An initializer instance. `None` if :attr:`hparams` is `None`.\n    \"\"\"\n    if hparams is None:\n        return None\n\n    kwargs = hparams.get(\"kwargs\", {})\n    if isinstance(kwargs, HParams):\n        kwargs = kwargs.todict()\n    modules = [\"tensorflow.initializers\", \"tensorflow.keras.initializers\",\n               \"tensorflow\", \"texar.custom\"]\n    try:\n        initializer = utils.check_or_get_instance(hparams[\"type\"], kwargs,\n                                                  modules)\n    except TypeError:\n        modules += ['tensorflow.contrib.layers']\n        initializer_fn = utils.get_function(hparams[\"type\"], modules)\n        initializer = initializer_fn(**kwargs)\n\n    return initializer\n\ndef get_activation_fn(fn_name=\"identity\", kwargs=None):\n    \"\"\"Returns an activation function `fn` with the signature\n    `output = fn(input)`.\n\n    If the function specified by :attr:`fn_name` has more than one arguments\n    without default values, then all these arguments except the input feature\n    argument must be specified in :attr:`kwargs`. Arguments with default values\n    can also be specified in :attr:`kwargs` to take values other than the\n    defaults. In this case a partial function is returned with the above\n    signature.\n\n    Args:\n        fn_name (str or callable): An activation function, or its name or\n            module path. The function can be:\n\n            - Built-in function defined in :tf_main:`tf < >` or \\\n            :tf_main:`tf.nn <nn>`, e.g., :tf_main:`tf.identity <identity>`.\n            - User-defined activation functions in module :mod:`texar.custom`.\n            - External activation functions. Must provide the full module path,\\\n              e.g., \"my_module.my_activation_fn\".\n\n        kwargs (optional): A `dict` or instance of :class:`~texar.HParams`\n            containing the keyword arguments of the activation function.\n\n    Returns:\n        An activation function. `None` if :attr:`fn_name` is `None`.\n    \"\"\"\n    if fn_name is None:\n        return None\n\n    fn_modules = ['tensorflow', 'tensorflow.nn', 'texar.custom', 'texar.core.layers']\n    activation_fn_ = utils.get_function(fn_name, fn_modules)\n    activation_fn = activation_fn_\n\n    # Make a partial function if necessary\n    if kwargs is not None:\n        if isinstance(kwargs, HParams):\n            kwargs = kwargs.todict()\n        def _partial_fn(features):\n            return activation_fn_(features, **kwargs)\n        activation_fn = _partial_fn\n\n    return activation_fn\n\n\ndef get_constraint_fn(fn_name=\"NonNeg\"):\n    \"\"\"Returns a constraint function.\n\n    .. role:: python(code)\n       :language: python\n\n    The function must follow the signature:\n    :python:`w_ = constraint_fn(w)`.\n\n    Args:\n        fn_name (str or callable): The name or full path to a\n            constraint function, or the function itself.\n\n            The function can be:\n\n            - Built-in constraint functions defined in modules \\\n            :tf_main:`tf.keras.constraints <keras/constraints>` \\\n            (e.g., :tf_main:`NonNeg <keras/constraints/NonNeg>`) \\\n            or :tf_main:`tf < >` or :tf_main:`tf.nn <nn>` \\\n            (e.g., activation functions).\n            - User-defined function in :mod:`texar.custom`.\n            - Externally defined function. Must provide the full path, \\\n            e.g., `\"my_module.my_constraint_fn\"`.\n\n            If a callable is provided, then it is returned directly.\n\n    Returns:\n        The constraint function. `None` if :attr:`fn_name` is `None`.\n    \"\"\"\n    if fn_name is None:\n        return None\n\n    fn_modules = ['tensorflow.keras.constraints', 'tensorflow',\n                  'tensorflow.nn', 'texar.custom']\n    constraint_fn = utils.get_function(fn_name, fn_modules)\n    return constraint_fn\n\ndef get_layer(hparams):\n    \"\"\"Makes a layer instance.\n\n    The layer must be an instance of :tf_main:`tf.layers.Layer <layers/Layer>`.\n\n    Args:\n        hparams (dict or HParams): Hyperparameters of the layer, with\n            structure:\n\n            .. code-block:: python\n\n                {\n                    \"type\": \"LayerClass\",\n                    \"kwargs\": {\n                        # Keyword arguments of the layer class\n                        # ...\n                    }\n                }\n\n            Here:\n\n            \"type\" : str or layer class or layer instance\n                The layer type. This can be\n\n                - The string name or full module path of a layer class. If \\\n                the class name is provided, the class must be in module \\\n                :tf_main:`tf.layers <layers>`, :mod:`texar.core`, \\\n                or :mod:`texar.custom`.\n                - A layer class.\n                - An instance of a layer class.\n\n                For example\n\n                .. code-block:: python\n\n                    \"type\": \"Conv1D\" # class name\n                    \"type\": \"texar.core.MaxReducePooling1D\" # module path\n                    \"type\": \"my_module.MyLayer\" # module path\n                    \"type\": tf.layers.Conv2D # class\n                    \"type\": Conv1D(filters=10, kernel_size=2) # cell instance\n                    \"type\": MyLayer(...) # cell instance\n\n            \"kwargs\" : dict\n                A dictionary of keyword arguments for constructor of the\n                layer class. Ignored if :attr:`\"type\"` is a layer instance.\n\n                - Arguments named \"activation\" can be a callable, \\\n                or a `str` of \\\n                the name or module path to the activation function.\n                - Arguments named \"\\*_regularizer\" and \"\\*_initializer\" \\\n                can be a class instance, or a `dict` of \\\n                hyperparameters of \\\n                respective regularizers and initializers. See\n                - Arguments named \"\\*_constraint\" can be a callable, or a \\\n                `str` of the name or full path to the constraint function.\n\n    Returns:\n        A layer instance. If hparams[\"type\"] is a layer instance, returns it\n        directly.\n\n    Raises:\n        ValueError: If :attr:`hparams` is `None`.\n        ValueError: If the resulting layer is not an instance of\n            :tf_main:`tf.layers.Layer <layers/Layer>`.\n    \"\"\"\n    if hparams is None:\n        raise ValueError(\"`hparams` must not be `None`.\")\n\n    layer_type = hparams[\"type\"]\n    if not is_str(layer_type) and not isinstance(layer_type, type):\n        layer = layer_type\n    else:\n        layer_modules = [\"tensorflow.layers\", \"texar.core\", \"texar.costum\"]\n        layer_class = utils.check_or_get_class(layer_type, layer_modules)\n        if isinstance(hparams, dict):\n            default_kwargs = _layer_class_to_default_kwargs_map.get(layer_class,\n                                                                    {})\n            default_hparams = {\"type\": layer_type, \"kwargs\": default_kwargs}\n            hparams = HParams(hparams, default_hparams)\n\n        kwargs = {}\n        for k, v in hparams.kwargs.items():\n            if k.endswith('_regularizer'):\n                kwargs[k] = get_regularizer(v)\n            elif k.endswith('_initializer'):\n                kwargs[k] = get_initializer(v)\n            elif k.endswith('activation'):\n                kwargs[k] = get_activation_fn(v)\n            elif k.endswith('_constraint'):\n                kwargs[k] = get_constraint_fn(v)\n            else:\n                kwargs[k] = v\n        layer = utils.get_instance(layer_type, kwargs, layer_modules)\n\n    if not isinstance(layer, tf.layers.Layer):\n        raise ValueError(\"layer must be an instance of `tf.layers.Layer`.\")\n\n    return layer\n\n\ndef _compute_concat_output_shape(input_shape, axis):\n    \"\"\"Infers the output shape of concat given the input shape.\n\n    The code is adapted from the ConcatLayer of lasagne\n    (https://github.com/Lasagne/Lasagne/blob/master/lasagne/layers/merge.py)\n\n    Args:\n        input_shape (list): A list of shapes, each of which is in turn a\n            list or TensorShape.\n        axis (int): Axis of the concat operation.\n\n    Returns:\n        list: Output shape of concat.\n    \"\"\"\n    # The size of each axis of the output shape equals the first\n    # input size of respective axis that is not `None`\n    input_shape = [tf.TensorShape(s).as_list() for s in input_shape]\n    output_shape = [next((s for s in sizes if s is not None), None)\n                    for sizes in zip(*input_shape)]\n    axis_sizes = [s[axis] for s in input_shape]\n    concat_axis_size = None if any(s is None for s in axis_sizes) \\\n            else sum(axis_sizes)\n    output_shape[axis] = concat_axis_size\n    return output_shape\n\nclass _ReducePooling1D(tf.layers.Layer):\n    \"\"\"Pooling layer for arbitrary reduce functions for 1D inputs.\n\n    The same as `tf.python.layers.pooling._Pooling1D` except that the pooling\n    dimension is entirely reduced (i.e., `pool_size=length`).\n\n    This class is for code reuse, rather than an exposed API.\n    \"\"\"\n    def __init__(self, reduce_function, data_format='channels_last',\n                 name=None, **kwargs):\n        super(_ReducePooling1D, self).__init__(name=name, **kwargs)\n        self._reduce_function = reduce_function\n        if data_format not in {'channels_last', 'channels_first'}:\n            raise ValueError(\"`data_format must be either 'channels_last' or` \"\n                             \"'channels_first'. Got: {}\".format(data_format))\n        self._data_format = data_format\n\n    def compute_output_shape(self, input_shape):\n        input_shape = tf.TensorShape(input_shape).as_list()\n        if self._data_format == 'channels_last':\n            return tf.TensorShape([input_shape[0], input_shape[2]])\n        else:\n            return tf.TensorShape([input_shape[0], input_shape[1]])\n\n    def call(self, inputs):\n        if self._data_format == 'channels_last':\n            return self._reduce_function(inputs, axis=1)\n        else:\n            return self._reduce_function(inputs, axis=2)\n\nclass MaxReducePooling1D(_ReducePooling1D):\n    \"\"\"A subclass of :tf_main:`tf.layers.Layer <layers/Layer>`.\n    Max Pooling layer for 1D inputs. The same as\n    :tf_main:`MaxPooling1D <layers/MaxPooling1D>` except that the pooling\n    dimension is entirely reduced (i.e., `pool_size=input_length`).\n    \"\"\"\n    def __init__(self, data_format='channels_last', name=None, **kwargs):\n        super(MaxReducePooling1D, self).__init__(\n            tf.reduce_max, data_format=data_format, name=name, **kwargs)\n\nclass AverageReducePooling1D(_ReducePooling1D):\n    \"\"\"A subclass of :tf_main:`tf.layers.Layer <layers/Layer>`.\n    Average Pooling layer for 1D inputs. The same as\n    :tf_main:`AveragePooling1D <layers/AveragePooling1D>` except that the\n    pooling dimension is entirely reduced (i.e., `pool_size=input_length`).\n    \"\"\"\n    def __init__(self, data_format='channels_last', name=None, **kwargs):\n        super(AverageReducePooling1D, self).__init__(\n            tf.reduce_mean, data_format=data_format, name=name, **kwargs)\n\n_POOLING_TO_REDUCE = {\n    \"MaxPooling1D\": \"MaxReducePooling1D\",\n    \"AveragePooling1D\": \"AverageReducePooling1D\",\n    tf.layers.MaxPooling1D: MaxReducePooling1D,\n    tf.layers.AveragePooling1D: AverageReducePooling1D\n}\n\ndef get_pooling_layer_hparams(hparams):\n    \"\"\"Creates pooling layer hparams `dict` usable for :func:`get_layer`.\n\n    If the :attr:`hparams` sets `'pool_size'` to `None`, the layer will be\n    changed to the respective reduce-pooling layer. For example,\n    :class:`tf.layers.MaxPooling1D <layers/MaxPooling1D>` is replaced with\n    :class:`~texar.core.MaxReducePooling1D`.\n    \"\"\"\n    if isinstance(hparams, HParams):\n        hparams = hparams.todict()\n\n    new_hparams = copy.copy(hparams)\n    kwargs = new_hparams.get('kwargs', None)\n\n    if kwargs and kwargs.get('pool_size', None) is None:\n        pool_type = hparams['type']\n        new_hparams['type'] = _POOLING_TO_REDUCE.get(pool_type, pool_type)\n        kwargs.pop('pool_size', None)\n        kwargs.pop('strides', None)\n        kwargs.pop('padding', None)\n\n    return new_hparams\n\nclass MergeLayer(tf.layers.Layer):\n    \"\"\"A subclass of :tf_main:`tf.layers.Layer <layers/Layer>`.\n    A layer that consists of multiple layers in parallel. Input is fed to\n    each of the parallel layers, and the outputs are merged with a\n    specified mode.\n\n    Args:\n        layers (list, optional): A list of :tf_main:`tf.layers.Layer\n            <layers/layer>` instances, or a list of hyperparameter dicts\n            each of which specifies type and kwargs of each layer (see\n            the `hparams` argument of :func:`get_layer`).\n\n            If `None`, this layer degenerates to a merging operator that merges\n            inputs directly.\n        mode (str): Mode of the merge op. This can be:\n\n            - :attr:`'concat'`: Concatenates layer outputs along one axis. \\\n              Tensors must have the same shape except for the dimension \\\n              specified in `axis`, which can have different sizes.\n            - :attr:`'elemwise_sum'`: Outputs element-wise sum.\n            - :attr:`'elemwise_mul'`: Outputs element-wise product.\n            - :attr:`'sum'`: Computes the sum of layer outputs along the \\\n              dimension given by `axis`. E.g., given `axis=1`, \\\n              two tensors of shape `[a, b]` and `[a, c]` respectively \\\n              will result in a merged tensor of shape `[a]`.\n            - :attr:`'mean'`: Computes the mean of layer outputs along the \\\n              dimension given in `axis`.\n            - :attr:`'prod'`: Computes the product of layer outputs along the \\\n              dimension given in `axis`.\n            - :attr:`'max'`: Computes the maximum of layer outputs along the \\\n              dimension given in `axis`.\n            - :attr:`'min'`: Computes the minimum of layer outputs along the \\\n              dimension given in `axis`.\n            - :attr:`'and'`: Computes the `logical and` of layer outputs along \\\n              the dimension given in `axis`.\n            - :attr:`'or'`: Computes the `logical or` of layer outputs along \\\n              the dimension given in `axis`.\n            - :attr:`'logsumexp'`: Computes \\\n              log(sum(exp(elements across the dimension of layer outputs)))\n        axis (int): The axis to use in merging. Ignored in modes\n            :attr:`'elemwise_sum'` and :attr:`'elemwise_mul'`.\n        trainable (bool): Whether the layer should be trained.\n        name (str, optional): Name of the layer.\n    \"\"\"\n\n    def __init__(self,\n                 layers=None,\n                 mode='concat',\n                 axis=1,\n                 trainable=True,\n                 name=None,\n                 **kwargs):\n        super(MergeLayer, self).__init__(\n            trainable=trainable, name=name, **kwargs)\n        self._mode = mode\n        self._axis = axis\n\n        self._layers = None\n        if layers is not None:\n            if len(layers) == 0:\n                raise ValueError(\n                    \"'layers' must be either None or a non-empty list.\")\n            self._layers = []\n            for layer in layers:\n                if isinstance(layer, tf.layers.Layer):\n                    self._layers.append(layer)\n                else:\n                    self._layers.append(get_layer(hparams=layer))\n\n        # Keep tracks of whether trainable variables have been created\n        self._vars_built = False\n\n    def compute_output_shape(self, input_shape):\n        if self._layers is None:\n            _shapes = input_shape\n            if not isinstance(_shapes, (list, tuple)):\n                _shapes = [_shapes]\n        else:\n            _shapes = []\n            for layer in self._layers:\n                layer_output_shape = layer.compute_output_shape(input_shape)\n                _shapes.append(layer_output_shape)\n        _shapes = [tf.TensorShape(s) for s in _shapes]\n\n        if self._mode == 'concat':\n            output_shape = _compute_concat_output_shape(_shapes, self._axis)\n        elif self._mode in ['sum', 'mean', 'prod', 'max', 'min',\n                            'and', 'or', 'logsumexp']:\n            output_shape = _compute_concat_output_shape(_shapes, self._axis)\n            output_shape.pop(self._axis)\n        elif self._mode in ['elemwise_sum', 'elemwise_mul']:\n            # Simply infer the output shape as the input shape of highest rank\n            _ranks = [s.ndims for s in _shapes]\n            max_rank = max(_ranks)\n            max_ranked_shapes = []\n            for i, s in enumerate(_shapes):\n                if _ranks[i] == max_rank:\n                    max_ranked_shapes.append(s.as_list())\n            # Grab the first size of each axis that is not `None`\n            output_shape = [next((s for s in sizes if s is not None), None)\n                            for sizes in zip(*max_ranked_shapes)]\n        else:\n            raise ValueError(\"Unknown merge mode: '%s'\" % self._mode)\n\n        return tf.TensorShape(output_shape)\n\n    def _collect_weights(self):\n        \"\"\"Collects (non-)trainable weights of each of the parallel layers.\n        \"\"\"\n        if self._layers is None:\n            pass\n        for layer in self._layers:\n            if self.trainable:\n                add_variable(\n                    layer._trainable_weights, self._trainable_weights)\n            else:\n                add_variable(\n                    layer._trainable_weights, self._non_trainable_weights)\n            add_variable(\n                layer._non_trainable_weights, self._non_trainable_weights)\n\n    def call(self, inputs):\n        if self._layers is None:\n            layer_outputs = inputs\n            if not isinstance(layer_outputs, (list, tuple)):\n                layer_outputs = [layer_outputs]\n        else:\n            layer_outputs = []\n            for layer in self._layers:\n                layer_output = layer(inputs)\n                layer_outputs.append(layer_output)\n\n        if self._mode == 'concat':\n            outputs = tf.concat(values=layer_outputs, axis=self._axis)\n        elif self._mode == 'elemwise_sum':\n            outputs = layer_outputs[0]\n            for i in range(1, len(layer_outputs)):\n                outputs = tf.add(outputs, layer_outputs[i])\n        elif self._mode == 'elemwise_mul':\n            outputs = layer_outputs[0]\n            for i in range(1, len(layer_outputs)):\n                outputs = tf.multiply(outputs, layer_outputs[i])\n        elif self._mode == 'sum':\n            _concat = tf.concat(values=layer_outputs, axis=self._axis)\n            outputs = tf.reduce_sum(_concat, axis=self._axis)\n        elif self._mode == 'mean':\n            _concat = tf.concat(values=layer_outputs, axis=self._axis)\n            outputs = tf.reduce_mean(_concat, axis=self._axis)\n        elif self._mode == 'prod':\n            _concat = tf.concat(values=layer_outputs, axis=self._axis)\n            outputs = tf.reduce_prod(_concat, axis=self._axis)\n        elif self._mode == 'max':\n            _concat = tf.concat(values=layer_outputs, axis=self._axis)\n            outputs = tf.reduce_max(_concat, axis=self._axis)\n        elif self._mode == 'min':\n            _concat = tf.concat(values=layer_outputs, axis=self._axis)\n            outputs = tf.reduce_min(_concat, axis=self._axis)\n        elif self._mode == 'and':\n            _concat = tf.concat(values=layer_outputs, axis=self._axis)\n            outputs = tf.reduce_all(_concat, axis=self._axis)\n        elif self._mode == 'or':\n            _concat = tf.concat(values=layer_outputs, axis=self._axis)\n            outputs = tf.reduce_any(_concat, axis=self._axis)\n        elif self._mode == 'logsumexp':\n            _concat = tf.concat(values=layer_outputs, axis=self._axis)\n            outputs = tf.reduce_logsumexp(_concat, axis=self._axis)\n        else:\n            raise ValueError(\"Unknown merge mode: '%s'\" % self._mode)\n\n        if not self.built or not self._vars_built:\n            self._collect_weights()\n            self._vars_built = True\n\n        return outputs\n\n    @property\n    def layers(self):\n        \"\"\"The list of parallel layers.\n        \"\"\"\n        return self._layers\n\n\nclass SequentialLayer(tf.layers.Layer):\n    \"\"\"A subclass of :tf_main:`tf.layers.Layer <layers/Layer>`.\n    A layer that consists of multiple layers connected sequentially.\n\n    Args:\n        layers (list): A list of :tf_main:`tf.layers.Layer\n            <layers/layer>` instances, or a list of hyperparameter dicts\n            each of which specifying type and kwargs of each layer (see\n            the `hparams` argument of :func:`get_layer`). The layers are\n            connected sequentially.\n    \"\"\"\n    def __init__(self,\n                 layers,\n                 trainable=True,\n                 name=None,\n                 **kwargs):\n        super(SequentialLayer, self).__init__(\n            trainable=trainable, name=name, **kwargs)\n\n        if len(layers) == 0:\n            raise ValueError(\"'layers' must be a non-empty list.\")\n        self._layers = []\n        for layer in layers:\n            if isinstance(layer, tf.layers.Layer):\n                self._layers.append(layer)\n            else:\n                self._layers.append(get_layer(hparams=layer))\n\n        # Keep tracks of whether trainable variables have been created\n        self._vars_built = False\n\n    def compute_output_shape(self, input_shape):\n        input_shape = tf.TensorShape(input_shape)\n        for layer in self._layers:\n            output_shape = layer.compute_output_shape(input_shape)\n            input_shape = output_shape\n        return output_shape\n\n    def _collect_weights(self):\n        \"\"\"Collects (non-)trainable weights of each of the layers.\n        \"\"\"\n        for layer in self._layers:\n            if self.trainable:\n                add_variable(\n                    layer._trainable_weights, self._trainable_weights)\n            else:\n                add_variable(\n                    layer._trainable_weights, self._non_trainable_weights)\n            add_variable(\n                layer._non_trainable_weights, self._non_trainable_weights)\n\n    def call(self, inputs, mode=None): # pylint: disable=arguments-differ\n        training = is_train_mode(mode)\n\n        outputs = inputs\n        for layer in self._layers:\n            if isinstance(layer, tf.layers.Dropout) or \\\n                    isinstance(layer, tf.layers.BatchNormalization):\n                outputs = layer(outputs, training=training)\n            else:\n                outputs = layer(inputs)\n            inputs = outputs\n\n        if not self.built or not self._vars_built:\n            self._collect_weights()\n            self._vars_built = True\n\n        return outputs\n\n    @property\n    def layers(self):\n        \"\"\"The list of layers connected sequentially.\n        \"\"\"\n        return self._layers\n\n\ndef _common_default_conv_dense_kwargs():\n    \"\"\"Returns the default keyword argument values that are common to\n    convolution layers.\n    \"\"\"\n    return {\n        \"activation\": None,\n        \"use_bias\": True,\n        \"kernel_initializer\": {\n            \"type\": \"glorot_uniform_initializer\",\n            \"kwargs\": {}\n        },\n        \"bias_initializer\": {\n            \"type\": \"zeros_initializer\",\n            \"kwargs\": {}\n        },\n        \"kernel_regularizer\": default_regularizer_hparams(),\n        \"bias_regularizer\": default_regularizer_hparams(),\n        \"activity_regularizer\": default_regularizer_hparams(),\n        \"kernel_constraint\": None,\n        \"bias_constraint\": None,\n        \"trainable\": True,\n        \"name\": None\n    }\n\ndef default_conv1d_kwargs():\n    \"\"\"Returns the default keyword argument values of the constructor\n    of 1D-convolution layer class\n    :tf_main:`tf.layers.Conv1D <layers/Conv1D>`.\n\n    .. code-block:: python\n\n        {\n            \"filters\": 100,\n            \"kernel_size\": 3,\n            \"strides\": 1,\n            \"padding\": 'valid',\n            \"data_format\": 'channels_last',\n            \"dilation_rate\": 1\n            \"activation\": \"identity\",\n            \"use_bias\": True,\n            \"kernel_initializer\": {\n                \"type\": \"glorot_uniform_initializer\",\n                \"kwargs\": {}\n            },\n            \"bias_initializer\": {\n                \"type\": \"zeros_initializer\",\n                \"kwargs\": {}\n            },\n            \"kernel_regularizer\": {\n                \"type\": \"L1L2\",\n                \"kwargs\": {\n                    \"l1\": 0.,\n                    \"l2\": 0.\n                }\n            },\n            \"bias_regularizer\": {\n                # same as in \"kernel_regularizer\"\n                # ...\n            },\n            \"activity_regularizer\": {\n                # same as in \"kernel_regularizer\"\n                # ...\n            },\n            \"kernel_constraint\": None,\n            \"bias_constraint\": None,\n            \"trainable\": True,\n            \"name\": None\n        }\n    \"\"\"\n    kwargs = _common_default_conv_dense_kwargs()\n    kwargs.update({\n        \"kernel_size\": 3,\n        \"filters\": 100,\n        \"strides\": 1,\n        \"dilation_rate\": 1,\n        \"data_format\": \"channels_last\"\n    })\n    return kwargs\n\ndef default_conv2d_kwargs():\n    \"\"\"TODO\n    \"\"\"\n    return {}\ndef default_conv3d_kwargs():\n    \"\"\"TODO\n    \"\"\"\n    return {}\ndef default_conv2d_transpose_kwargs():\n    \"\"\"TODO\n    \"\"\"\n    return {}\ndef default_conv3d_transpose_kwargs():\n    \"\"\"TODO\n    \"\"\"\n    return {}\n\ndef default_dense_kwargs():\n    \"\"\"Returns the default keyword argument values of the constructor\n    of the dense layer class :tf_main:`tf.layers.Dense <layers/Dense>`.\n\n    .. code-block:: python\n\n        {\n            \"units\": 256,\n            \"activation\": \"identity\",\n            \"use_bias\": True,\n            \"kernel_initializer\": {\n                \"type\": \"glorot_uniform_initializer\",\n                \"kwargs\": {}\n            },\n            \"bias_initializer\": {\n                \"type\": \"zeros_initializer\",\n                \"kwargs\": {}\n            },\n            \"kernel_regularizer\": {\n                \"type\": \"L1L2\",\n                \"kwargs\": {\n                    \"l1\": 0.,\n                    \"l2\": 0.\n                }\n            },\n            \"bias_regularizer\": {\n                # same as in \"kernel_regularizer\"\n                # ...\n            },\n            \"activity_regularizer\": {\n                # same as in \"kernel_regularizer\"\n                # ...\n            },\n            \"kernel_constraint\": None,\n            \"bias_constraint\": None,\n            \"trainable\": True,\n            \"name\": None\n        }\n    \"\"\"\n    kwargs = _common_default_conv_dense_kwargs()\n    kwargs.update({\n        \"units\": 256\n    })\n    return kwargs\n\ndef default_dropout_kwargs():\n    \"\"\"TODO\n    \"\"\"\n    return {}\n    #raise NotImplementedError\ndef default_flatten_kwargs():\n    \"\"\"TODO\n    \"\"\"\n    return {}\ndef default_max_pooling1d_kwargs():\n    \"\"\"TODO\n    \"\"\"\n    return {}\n    #raise NotImplementedError\ndef default_max_pooling2d_kwargs():\n    \"\"\"TODO\n    \"\"\"\n    return {}\n    #raise NotImplementedError\ndef default_max_pooling3d_kwargs():\n    \"\"\"TODO\n    \"\"\"\n    return {}\n    #raise NotImplementedError\ndef default_separable_conv2d_kwargs():\n    \"\"\"TODO\n    \"\"\"\n    return {}\n    #raise NotImplementedError\ndef default_batch_normalization_kwargs():\n    \"\"\"TODO\n    \"\"\"\n    return {}\n    #raise NotImplementedError\ndef default_average_pooling1d_kwargs():\n    \"\"\"TODO\n    \"\"\"\n    return {}\n    #raise NotImplementedError\ndef default_average_pooling2d_kwargs():\n    \"\"\"TODO\n    \"\"\"\n    return {}\n    #raise NotImplementedError\ndef default_average_pooling3d_kwargs():\n    \"\"\"TODO\n    \"\"\"\n    return {}\n    #raise NotImplementedError\n_layer_class_to_default_kwargs_map = {\n    tf.layers.Conv1D: default_conv1d_kwargs(),\n    tf.layers.Conv2D: default_conv2d_kwargs(),\n    tf.layers.Conv3D: default_conv3d_kwargs(),\n    tf.layers.Conv2DTranspose: default_conv2d_transpose_kwargs(),\n    tf.layers.Conv3DTranspose: default_conv3d_transpose_kwargs(),\n    tf.layers.Dense: default_dense_kwargs(),\n    tf.layers.Dropout: default_dropout_kwargs(),\n    tf.layers.Flatten: default_flatten_kwargs(),\n    tf.layers.MaxPooling1D: default_max_pooling1d_kwargs(),\n    tf.layers.MaxPooling2D: default_max_pooling2d_kwargs(),\n    tf.layers.MaxPooling3D: default_max_pooling3d_kwargs(),\n    tf.layers.SeparableConv2D: default_separable_conv2d_kwargs(),\n    tf.layers.BatchNormalization: default_batch_normalization_kwargs(),\n    tf.layers.AveragePooling1D: default_average_pooling1d_kwargs(),\n    tf.layers.AveragePooling2D: default_average_pooling2d_kwargs(),\n    tf.layers.AveragePooling3D: default_average_pooling3d_kwargs(),\n}\n\ndef layer_normalize(inputs,\n                    scope=None):\n    '''Applies layer normalization. averaging over the last dimension\n    Args:\n        inputs: A tensor with 2 or more dimensions, where the first\n            dimension has `batch_size`.\n        epsilon: A floating number. A very small number for preventing\n            ZeroDivision Error.\n        scope: Optional scope for `variable_scope`.\n    Returns:\n        A tensor with the same shape and data dtype as `inputs`.\n    '''\n    return tf.contrib.layers.layer_norm(\n        inputs=inputs, begin_norm_axis=-1, begin_params_axis=-1, scope=scope\n    )\n\n\ndef gelu(input_tensor):\n    \"\"\"Gaussian Error Linear Unit.\n    This is a smoother version of the RELU.\n    Original paper: https://arxiv.org/abs/1606.08415\n    Args:\n      input_tensor: float Tensor to perform activation.\n    Returns:\n      `input_tensor` with the GELU activation applied.\n    \"\"\"\n    cdf = 0.5 * (1.0 + tf.erf(input_tensor / tf.sqrt(2.0)))\n    return input_tensor * cdf\n"
  },
  {
    "path": "texar_repo/texar/core/layers_test.py",
    "content": "#\n\"\"\"\nUnit tests for various layers.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport numpy as np\n\nimport tensorflow as tf\nimport tensorflow.contrib.rnn as rnn\n\nimport texar as tx\nfrom texar import context\nfrom texar.hyperparams import HParams\nfrom texar.core import layers\n\n# pylint: disable=no-member, protected-access, invalid-name\n# pylint: disable=redefined-variable-type\n\nclass GetRNNCellTest(tf.test.TestCase):\n    \"\"\"Tests RNN cell creator.\n    \"\"\"\n\n    def test_get_rnn_cell(self):\n        \"\"\"Tests :func:`texar.core.layers.get_rnn_cell`.\n        \"\"\"\n        emb_dim = 4\n        num_units = 64\n\n        # Given instance\n        hparams = {\n            \"type\": rnn.LSTMCell(num_units)\n        }\n        cell = layers.get_rnn_cell(hparams)\n        self.assertTrue(isinstance(cell, rnn.LSTMCell))\n\n        # Given class\n        hparams = {\n            \"type\": rnn.LSTMCell,\n            \"kwargs\": {\"num_units\": 10}\n        }\n        cell = layers.get_rnn_cell(hparams)\n        self.assertTrue(isinstance(cell, rnn.LSTMCell))\n\n        # Given string, and complex hyperparameters\n        keep_prob_x = tf.placeholder(\n            name='keep_prob', shape=[], dtype=tf.float32)\n        hparams = {\n            \"type\": \"tensorflow.contrib.rnn.GRUCell\",\n            \"kwargs\": {\n                \"num_units\": num_units\n            },\n            \"num_layers\": 2,\n            \"dropout\": {\n                \"input_keep_prob\": 0.8,\n                \"state_keep_prob\": keep_prob_x,\n                \"variational_recurrent\": True,\n                \"input_size\": [emb_dim, num_units]\n            },\n            \"residual\": True,\n            \"highway\": True\n        }\n\n        hparams_ = HParams(hparams, layers.default_rnn_cell_hparams())\n        cell = layers.get_rnn_cell(hparams_)\n\n        batch_size = 16\n        inputs = tf.zeros([batch_size, emb_dim], dtype=tf.float32)\n        output, state = cell(inputs,\n                             cell.zero_state(batch_size, dtype=tf.float32))\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n\n            feed_dict = {\n                keep_prob_x: 1.0,\n                context.global_mode(): tf.estimator.ModeKeys.TRAIN\n            }\n            output_, state_ = sess.run([output, state], feed_dict=feed_dict)\n\n            self.assertEqual(output_.shape[0], batch_size)\n            if isinstance(state_, (list, tuple)):\n                self.assertEqual(state_[0].shape[0], batch_size)\n                self.assertEqual(state_[0].shape[1],\n                                 hparams_.kwargs.num_units)\n            else:\n                self.assertEqual(state_.shape[0], batch_size)\n                self.assertEqual(state_.shape[1],\n                                 hparams_.kwargs.num_units)\n\n\n    def test_switch_dropout(self):\n        \"\"\"Tests dropout mode.\n        \"\"\"\n        emb_dim = 4\n        num_units = 64\n        hparams = {\n            \"kwargs\": {\n                \"num_units\": num_units\n            },\n            \"num_layers\": 2,\n            \"dropout\": {\n                \"input_keep_prob\": 0.8,\n            },\n        }\n        mode = tf.placeholder(tf.string)\n        hparams_ = HParams(hparams, layers.default_rnn_cell_hparams())\n        cell = layers.get_rnn_cell(hparams_, mode)\n\n        batch_size = 16\n        inputs = tf.zeros([batch_size, emb_dim], dtype=tf.float32)\n        output, state = cell(inputs,\n                             cell.zero_state(batch_size, dtype=tf.float32))\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            output_train, _ = sess.run(\n                [output, state],\n                feed_dict={mode: tf.estimator.ModeKeys.TRAIN})\n            self.assertEqual(output_train.shape[0], batch_size)\n            output_test, _ = sess.run(\n                [output, state],\n                feed_dict={mode: tf.estimator.ModeKeys.EVAL})\n            self.assertEqual(output_test.shape[0], batch_size)\n\n\nclass GetActivationFnTest(tf.test.TestCase):\n    \"\"\"Tests :func:`texar.core.layers.get_activation_fn`.\n    \"\"\"\n    def test_get_activation_fn(self):\n        \"\"\"Tests.\n        \"\"\"\n        fn = layers.get_activation_fn()\n        self.assertEqual(fn, tf.identity)\n\n        fn = layers.get_activation_fn('relu')\n        self.assertEqual(fn, tf.nn.relu)\n\n        inputs = tf.random_uniform([64, 100], -5, 20, dtype=tf.int32)\n\n        fn = layers.get_activation_fn('leaky_relu')\n        fn_output = fn(inputs)\n        ref_output = tf.nn.leaky_relu(inputs)\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            fn_output_, ref_output_ = sess.run([fn_output, ref_output])\n            np.testing.assert_array_equal(fn_output_, ref_output_)\n\n        fn = layers.get_activation_fn('leaky_relu', kwargs={'alpha': 0.1})\n        fn_output = fn(inputs)\n        ref_output = tf.nn.leaky_relu(inputs, alpha=0.1)\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            fn_output_, ref_output_ = sess.run([fn_output, ref_output])\n            np.testing.assert_array_equal(fn_output_, ref_output_)\n\n\nclass GetLayerTest(tf.test.TestCase):\n    \"\"\"Tests layer creator.\n    \"\"\"\n    def test_get_layer(self):\n        \"\"\"Tests :func:`texar.core.layers.get_layer`.\n        \"\"\"\n        hparams = {\n            \"type\": \"Conv1D\"\n        }\n        layer = layers.get_layer(hparams)\n        self.assertTrue(isinstance(layer, tf.layers.Conv1D))\n\n        hparams = {\n            \"type\": \"MergeLayer\",\n            \"kwargs\": {\n                \"layers\": [\n                    {\"type\": \"Conv1D\"},\n                    {\"type\": \"Conv1D\"}\n                ]\n            }\n        }\n        layer = layers.get_layer(hparams)\n        self.assertTrue(isinstance(layer, tx.core.MergeLayer))\n\n        hparams = {\n            \"type\": tf.layers.Conv1D\n        }\n        layer = layers.get_layer(hparams)\n        self.assertTrue(isinstance(layer, tf.layers.Conv1D))\n\n        hparams = {\n            \"type\": tf.layers.Conv1D(filters=10, kernel_size=2)\n        }\n        layer = layers.get_layer(hparams)\n        self.assertTrue(isinstance(layer, tf.layers.Conv1D))\n\n\nclass ReducePoolingLayerTest(tf.test.TestCase):\n    \"\"\"Tests reduce pooling layer.\n    \"\"\"\n    def setUp(self):\n        tf.test.TestCase.setUp(self)\n\n        self._batch_size = 64\n        self._seq_length = 16\n        self._emb_dim = 100\n\n    def test_max_reduce_pooling_layer(self):\n        \"\"\"Tests :class:`texar.core.MaxReducePooling1D`.\n        \"\"\"\n        pool_layer = layers.MaxReducePooling1D()\n\n        inputs = tf.random_uniform(\n            [self._batch_size, self._seq_length, self._emb_dim])\n        output_shape = pool_layer.compute_output_shape(inputs.get_shape())\n        output = pool_layer(inputs)\n        output_reduce = tf.reduce_max(inputs, axis=1)\n        self.assertEqual(output.get_shape(), output_shape)\n        self.assertEqual(output.get_shape(), [self._batch_size, self._emb_dim])\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            output_, output_reduce_ = sess.run([output, output_reduce])\n            np.testing.assert_array_equal(output_, output_reduce_)\n\n    def test_average_reduce_pooling_layer(self):\n        \"\"\"Tests :class:`texar.core.AverageReducePooling1D`.\n        \"\"\"\n        pool_layer = layers.AverageReducePooling1D()\n\n        inputs = tf.random_uniform(\n            [self._batch_size, self._seq_length, self._emb_dim])\n        output_shape = pool_layer.compute_output_shape(inputs.get_shape())\n        output = pool_layer(inputs)\n        output_reduce = tf.reduce_mean(inputs, axis=1)\n        self.assertEqual(output.get_shape(), output_shape)\n        self.assertEqual(output.get_shape(), [self._batch_size, self._emb_dim])\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            output_, output_reduce_ = sess.run([output, output_reduce])\n            np.testing.assert_array_equal(output_, output_reduce_)\n\nclass MergeLayerTest(tf.test.TestCase):\n    \"\"\"Tests MergeLayer.\n    \"\"\"\n\n    def test_output_shape(self):\n        \"\"\"Tests MergeLayer.compute_output_shape function.\n        \"\"\"\n        input_shapes = [[None, 1, 2], [64, 2, 2], [None, 3, 2]]\n\n        concat_layer = layers.MergeLayer(mode='concat', axis=1)\n        concat_output_shape = concat_layer.compute_output_shape(input_shapes)\n        self.assertEqual(concat_output_shape, [64, 6, 2])\n\n        sum_layer = layers.MergeLayer(mode='sum', axis=1)\n        sum_output_shape = sum_layer.compute_output_shape(input_shapes)\n        self.assertEqual(sum_output_shape, [64, 2])\n\n        input_shapes = [[None, 5, 2], [64, None, 2], [2]]\n        esum_layer = layers.MergeLayer(mode='elemwise_sum')\n        esum_output_shape = esum_layer.compute_output_shape(input_shapes)\n        self.assertEqual(esum_output_shape, [64, 5, 2])\n\n    def test_layer_logics(self):\n        \"\"\"Test the logic of MergeLayer.\n        \"\"\"\n        layers_ = []\n        layers_.append(tf.layers.Conv1D(filters=200, kernel_size=3))\n        layers_.append(tf.layers.Conv1D(filters=200, kernel_size=4))\n        layers_.append(tf.layers.Conv1D(filters=200, kernel_size=5))\n        layers_.append(tf.layers.Dense(200))\n        layers_.append(tf.layers.Dense(200))\n        m_layer = layers.MergeLayer(layers_)\n\n        inputs = tf.zeros([64, 16, 1024], dtype=tf.float32)\n        outputs = m_layer(inputs)\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            outputs_ = sess.run(outputs)\n            self.assertEqual(outputs_.shape[0], 64)\n            self.assertEqual(outputs_.shape[2], 200)\n            self.assertEqual(\n                outputs_.shape,\n                m_layer.compute_output_shape(inputs.shape.as_list()))\n\n    def test_trainable_variables(self):\n        \"\"\"Test the trainable_variables of the layer.\n        \"\"\"\n        layers_ = []\n        layers_.append(tf.layers.Conv1D(filters=200, kernel_size=3))\n        layers_.append(tf.layers.Conv1D(filters=200, kernel_size=4))\n        layers_.append(tf.layers.Conv1D(filters=200, kernel_size=5))\n        layers_.append(tf.layers.Dense(200))\n        layers_.append(tf.layers.Dense(200))\n        m_layer = layers.MergeLayer(layers_)\n\n        inputs = tf.zeros([64, 16, 1024], dtype=tf.float32)\n        _ = m_layer(inputs)\n\n        num_vars = sum([len(layer.trainable_variables) for layer in layers_])\n        self.assertEqual(num_vars, len(m_layer.trainable_variables))\n\nclass SequentialLayerTest(tf.test.TestCase):\n    \"\"\"Tests sequential layer.\n    \"\"\"\n\n    def test_seq_layer(self):\n        \"\"\"Test sequential layer.\n        \"\"\"\n        layers_ = []\n        layers_.append(tf.layers.Dense(100))\n        layers_.append(tf.layers.Dense(200))\n        seq_layer = layers.SequentialLayer(layers_)\n\n        output_shape = seq_layer.compute_output_shape([None, 10])\n        self.assertEqual(output_shape[1].value, 200)\n\n        inputs = tf.zeros([10, 20], dtype=tf.float32)\n        outputs = seq_layer(inputs)\n\n        num_vars = sum([len(layer.trainable_variables) for layer in layers_])\n        self.assertEqual(num_vars, len(seq_layer.trainable_variables))\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            outputs_ = sess.run(outputs)\n            self.assertEqual(outputs_.shape[0], 10)\n            self.assertEqual(outputs_.shape[1], 200)\n\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/core/optimization.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nVarious optimization related utilities.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\n\nimport re\nimport tensorflow as tf\n\nfrom texar.hyperparams import HParams\nfrom texar.utils import utils\n\n# pylint: disable=too-many-arguments, no-member\n\n__all__ = [\n    \"default_optimization_hparams\",\n    \"get_optimizer_fn\",\n    \"get_learning_rate_decay_fn\",\n    \"get_gradient_clip_fn\",\n    \"get_optimizer\",\n    \"get_train_op\",\n    \"AdamWeightDecayOptimizer\",\n]\n\ndef default_optimization_hparams():\n    \"\"\"Returns a `dict` of default hyperparameters of training op\n    and their default values\n\n    .. role:: python(code)\n       :language: python\n\n    .. code-block:: python\n\n        {\n            \"optimizer\": {\n                \"type\": \"AdamOptimizer\",\n                \"kwargs\": {\n                    \"learning_rate\": 0.001\n                }\n            },\n            \"learning_rate_decay\": {\n                \"type\": \"\",\n                \"kwargs\": {},\n                \"min_learning_rate\": 0.,\n                \"start_decay_step\": 0,\n                \"end_decay_step\": inf\n            },\n            \"gradient_clip\": {\n                \"type\": \"\",\n                \"kwargs\": {}\n            },\n            \"gradient_noise_scale\": None,\n            \"name\": None\n        }\n\n    Here:\n\n    \"optimizer\" : dict\n        Hyperparameters of a :tf_main:`tf.train.Optimizer <train/Optimizer>`.\n\n        - **\"type\"** specifies the optimizer class. This can be\n\n            - The string name or full module path of an optimizer class. \\\n            If the class name is provided, the class must be in module \\\n            :tf_main:`tf.train <train>`, \\\n            :tf_main:`tf.contrib.opt <contrib/opt>` or :mod:`texar.custom` \\\n            , :mod:`texar.core.optimization`\n            - An optimizer class.\n            - An instance of an optimizer class.\n\n            For example\n\n            .. code-block:: python\n\n                \"type\": \"AdamOptimizer\" # class name\n                \"type\": \"my_module.MyOptimizer\" # module path\n                \"type\": tf.contrib.opt.AdamWOptimizer # class\n                \"type\": my_module.MyOptimizer # class\n                \"type\": GradientDescentOptimizer(learning_rate=0.1) # instance\n                \"type\": MyOptimizer(...) # instance\n\n        - **\"kwargs\"** is a `dict` specifying keyword arguments for creating \\\n        the optimizer class instance, with :python:`opt_class(**kwargs)`. \\\n        Ignored if \"type\" is a class instance.\n\n    \"learning_rate_decay\" : dict\n        Hyperparameters of learning rate decay function. The learning rate\n        starts decay from :attr:`\"start_decay_step\"` and keeps unchanged after\n        :attr:`\"end_decay_step\"` or reaching :attr:`\"min_learning_rate\"`.\n\n        The decay function is specified in \"type\" and \"kwargs\".\n\n            - \"type\" can be a decay function or its name or module path. If \\\n            function name is provided, it must be from module \\\n            :tf_main:`tf.train <train>` or :mod:`texar.custom`, \\\n            :mod:`texar.core.optimization`.\n\n            - \"kwargs\" is a `dict` of keyword arguments for the function \\\n            excluding arguments named \"global_step\" and \"learning_rate\".\n\n        The function is called with\n        :python:`lr = decay_fn(learning_rate=lr, global_step=offset_step,\n        **kwargs)`, where `offset_step` is the global step offset as above.\n        The only exception is :tf_main:`tf.train.piecewise_constant\n        <train/piecewise_constant>` which is called with\n        :python:`lr = piecewise_constant(x=offset_step, **kwargs)`.\n\n    \"gradient_clip\" : dict\n        Hyperparameters of gradient clipping. The gradient clipping function\n        takes a list of `(gradients, variables)` tuples and returns a list\n        of `(clipped_gradients, variables)` tuples. Typical examples include\n        :tf_main:`tf.clip_by_global_norm <clip_by_global_norm>`,\n        :tf_main:`tf.clip_by_value <clip_by_value>`,\n        :tf_main:`tf.clip_by_norm <clip_by_norm>`,\n        :tf_main:`tf.clip_by_average_norm <clip_by_average_norm>`, etc.\n\n        \"type\" specifies the gradient clip function, and can be a function,\n        or its name or mudule path. If function name is provided, the\n        function must be from module :tf_main:`tf < >` or :mod:`texar.custom`,\n        :mod:`texar.core.optimization`.\n\n\n        \"kwargs\" specifies keyword arguments to the function, except arguments\n        named \"t\" or \"t_list\".\n\n        The function is called with\n        :python:`clipped_grads(, _) = clip_fn(t_list=grads, **kwargs)`\n        (e.g., for :tf_main:`tf.clip_by_global_norm <clip_by_global_norm>`) or\n        :python:`clipped_grads = [clip_fn(t=grad, **kwargs) for grad in grads]`\n        (e.g., for :tf_main:`tf.clip_by_value <clip_by_value>`).\n\n    \"gradient_noise_scale\" : float, optional\n        Adds 0-mean normal noise scaled by this value to gradient.\n    \"\"\"\n    return {\n        \"optimizer\": {\n            \"type\": \"AdamOptimizer\",\n            \"kwargs\": {\n                \"learning_rate\": 0.001\n            }\n        },\n        \"learning_rate_decay\": {\n            \"type\": \"\",\n            \"kwargs\": {},\n            \"min_learning_rate\": 0.,\n            \"start_decay_step\": 0,\n            \"end_decay_step\": utils.MAX_SEQ_LENGTH,\n        },\n        \"gradient_clip\": {\n            \"type\": \"\",\n            \"kwargs\": {}\n        },\n        \"gradient_noise_scale\": None,\n        # TODO(zhiting): allow module-level control of gradient_multipliers\n        \"name\": None\n    }\n\ndef get_optimizer_fn(hparams=None):\n    \"\"\"Returns a function `optimizer_fn` of making optimizer instance, along\n    with the optimizer class.\n\n    .. role:: python(code)\n       :language: python\n\n    The function has the signiture\n    :python:`optimizer_fn(learning_rate=None) -> optimizer class instance`\n\n    See the :attr:`\"optimizer\"` field of\n    :meth:`~texar.core.default_optimization_hparams` for all\n    hyperparameters and default values.\n\n    The optimizer class must be a subclass of\n    :tf_main:`tf.train.Optimizer <train/Optimizer>`.\n\n    Args:\n        hparams (dict or HParams, optional): hyperparameters. Missing\n            hyperparameters are set to default values automatically.\n\n    Returns:\n        - If hparams[\"type\"] is a string or optimizer class, returns\\\n        `(optimizer_fn, optimizer class)`,\n\n        - If hparams[\"type\"] is an optimizer instance, returns \\\n        `(the optimizer instance, optimizer class)`\n    \"\"\"\n    if hparams is None or isinstance(hparams, dict):\n        hparams = HParams(\n            hparams, default_optimization_hparams()[\"optimizer\"])\n\n    opt = hparams[\"type\"]\n    if isinstance(opt, tf.train.Optimizer):\n        return opt, type(opt)\n    opt_modules = ['tensorflow.train',\n                   'tensorflow.contrib.opt',\n                   'texar.core.optimization',\n                   'texar.custom']\n    try:\n        opt_class = utils.check_or_get_class(opt, opt_modules,\n                                             tf.train.Optimizer)\n    except TypeError:\n        raise ValueError(\n            \"Unrecognized optimizer. Must be string name of the \"\n            \"optimizer class, or the class which is a subclass of \"\n            \"tf.train.Optimizer, or an instance of the subclass of \"\n            \"Optimizer.\")\n\n    def _get_opt(learning_rate=None):\n        opt_kwargs = hparams[\"kwargs\"].todict()\n        fn_args = set(utils.get_args(opt_class.__init__))\n        if 'learning_rate' in fn_args and learning_rate is not None:\n            opt_kwargs[\"learning_rate\"] = learning_rate\n        return opt_class(**opt_kwargs)\n\n    return _get_opt, opt_class\n\ndef get_learning_rate_decay_fn(hparams=None):\n    \"\"\"Creates learning rate decay function based on the hyperparameters.\n\n    See the :attr:`learning_rate_decay` field in\n    :meth:`~texar.core.default_optimization_hparams` for all\n    hyperparameters and default values.\n\n    Args:\n        hparams (dict or HParams, optional): hyperparameters. Missing\n            hyperparameters are set to default values automatically.\n\n    Returns:\n        function or None: If hparams[\"type\"] is specified, returns a\n        function that takes `(learning_rate, step, **kwargs)` and\n        returns a decayed learning rate. If\n        hparams[\"type\"] is empty, returns `None`.\n    \"\"\"\n    if hparams is None or isinstance(hparams, dict):\n        hparams = HParams(\n            hparams, default_optimization_hparams()[\"learning_rate_decay\"])\n\n    fn_type = hparams[\"type\"]\n    if fn_type is None or fn_type == \"\":\n        return None\n\n    fn_modules = [\"tensorflow.train\", \"texar.custom\"]\n    decay_fn = utils.get_function(fn_type, fn_modules)\n    fn_kwargs = hparams[\"kwargs\"]\n    if fn_kwargs is HParams:\n        fn_kwargs = fn_kwargs.todict()\n\n    start_step = tf.to_int32(hparams[\"start_decay_step\"])\n    end_step = tf.to_int32(hparams[\"end_decay_step\"])\n\n    def lr_decay_fn(learning_rate, global_step):\n        \"\"\"Learning rate decay function.\n\n        Args:\n            learning_rate (float or Tensor): The original learning rate.\n            global_step (int or scalar int Tensor): optimization step counter.\n\n        Returns:\n            scalar float Tensor: decayed learning rate.\n        \"\"\"\n        offset_global_step = tf.maximum(\n            tf.minimum(tf.to_int32(global_step), end_step) - start_step, 0)\n        if decay_fn == tf.train.piecewise_constant:\n            decayed_lr = decay_fn(x=offset_global_step, **fn_kwargs)\n        else:\n            fn_kwargs_ = {\n                \"learning_rate\": learning_rate,\n                \"global_step\": offset_global_step}\n            fn_kwargs_.update(fn_kwargs)\n            decayed_lr = utils.call_function_with_redundant_kwargs(\n                decay_fn, fn_kwargs_)\n\n            decayed_lr = tf.maximum(decayed_lr, hparams[\"min_learning_rate\"])\n\n        return decayed_lr\n\n    return lr_decay_fn\n\n\ndef get_gradient_clip_fn(hparams=None):\n    \"\"\"Creates a gradient clipping function based on the hyperparameters.\n\n    See the :attr:`gradient_clip` field in\n    :meth:`~texar.core.default_optimization_hparams` for all\n    hyperparameters and default values.\n\n    The gradient clipping function takes a list of `(gradients, variables)`\n    tuples and returns a list of `(clipped_gradients, variables)` tuples.\n    Typical examples include\n    :tf_main:`tf.clip_by_global_norm <clip_by_global_norm>`,\n    :tf_main:`tf.clip_by_value <clip_by_value>`,\n    :tf_main:`tf.clip_by_norm <clip_by_norm>`,\n    :tf_main:`tf.clip_by_average_norm <clip_by_average_norm>`, etc.\n\n    Args:\n        hparams (dict or HParams, optional): hyperparameters. Missing\n            hyperparameters are set to default values automatically.\n\n    Returns:\n        function or `None`: If hparams[\"type\"] is specified, returns\n        the respective function. If hparams[\"type\"] is empty,\n        returns `None`.\n    \"\"\"\n    if hparams is None or isinstance(hparams, dict):\n        hparams = HParams(\n            hparams, default_optimization_hparams()[\"gradient_clip\"])\n    fn_type = hparams[\"type\"]\n    if fn_type is None or fn_type == \"\":\n        return None\n\n    fn_modules = [\"tensorflow\", \"texar.custom\"]\n    clip_fn = utils.get_function(fn_type, fn_modules)\n    clip_fn_args = utils.get_args(clip_fn)\n    fn_kwargs = hparams[\"kwargs\"]\n    if isinstance(fn_kwargs, HParams):\n        fn_kwargs = fn_kwargs.todict()\n\n    def grad_clip_fn(grads_and_vars):\n        \"\"\"Gradient clipping function.\n\n        Args:\n            grads_and_vars (list): A list of `(gradients, variables)` tuples.\n\n        Returns:\n            list: A list of `(clipped_gradients, variables)` tuples.\n        \"\"\"\n        grads, vars_ = zip(*grads_and_vars)\n        if clip_fn == tf.clip_by_global_norm:\n            clipped_grads, _ = clip_fn(t_list=grads, **fn_kwargs)\n        elif 't_list' in clip_fn_args:\n            clipped_grads = clip_fn(t_list=grads, **fn_kwargs)\n        elif 't' in clip_fn_args:     # e.g., tf.clip_by_value\n            clipped_grads = [clip_fn(t=grad, **fn_kwargs) for grad in grads]\n\n        return list(zip(clipped_grads, vars_))\n\n    return grad_clip_fn\n\ndef _get_static_lr(learning_rate=None, optimizer_class=None, hparams=None):\n    \"\"\"Return the base static learning_rate.\n        A helper function for creating the optimization function.\n    \"\"\"\n    hparams = HParams(hparams, default_optimization_hparams())\n    opt_hparams = hparams['optimizer']\n    if learning_rate is None:\n        learning_rate = opt_hparams[\"kwargs\"].get(\"learning_rate\", None)\n    if learning_rate is None:\n        # Try to get learning_rate from the default value of the\n        # optimizer's argument\n        opt_argspec = utils.get_default_arg_values(optimizer_class.__init__)\n        learning_rate = opt_argspec.get(\"learning_rate\", None)\n    return learning_rate\n\ndef get_optimizer(learning_rate=None, global_step=None, hparams=None):\n\n    \"\"\"Creates a optimizer instance.\n    Args:\n        learning_rate (float or Tensor, optional): If `None`, learning rate\n            specified in :attr:`hparams`, or the default learning rate\n            of the optimizer will be used (if exists).\n        global_step (optional): A scalar int Tensor. Step counter to update on\n            each step unless :attr:`increment_global_step` is `False`.\n            Learning rate decay uses :attr:`global_step`.\n            If `None`, it will be fetched from the default graph (see\n            :tf_main:`tf.train.get_global_step <train/get_global_step>` for\n            more details). If it has not been created, no step will be\n            incremented with each weight update.\n        hparams (dict or HParams, optional): hyperparameters. Missing\n            hyperparameters are set to default values automatically. See\n            :func:`~texar.core.default_optimization_hparams` for\n            all hyperparameters and default values.\n\n    Returns:\n        optimizer: the tf.train.Optimizer instance specified in hparams.\n    \"\"\"\n    hparams = HParams(hparams, default_optimization_hparams())\n\n    opt_hparams = hparams[\"optimizer\"]\n    optimizer_fn, optimizer_class = get_optimizer_fn(opt_hparams)\n\n    static_lr = _get_static_lr(learning_rate, optimizer_class, hparams)\n\n    lr_decay_fn = get_learning_rate_decay_fn(hparams[\"learning_rate_decay\"])\n    if lr_decay_fn is not None:\n        learning_rate = lr_decay_fn(learning_rate=static_lr,\n                                    global_step=global_step)\n    else:\n        learning_rate = static_lr\n\n    tf.summary.scalar(\"learning_rate\", learning_rate)\n\n    optimizer = optimizer_fn(learning_rate=learning_rate)\n\n    return optimizer\n\ndef get_train_op(loss, variables=None,\n                 optimizer=None, learning_rate=None,\n                 global_step=None, increment_global_step=True, hparams=None):\n    \"\"\"Creates a training op.\n\n    This is a wrapper of :tf_main:`tf.contrib.layers.optimize_loss\n    <contrib/layers/optimize_loss>`.\n\n    Args:\n        loss: A scalar Tensor representing the loss to minimize.\n        variables (optional): A list of Variables to optimize. If\n            `None`, all trainable variables are used.\n        optimizer (optional): An tf.train.Optimizer instance. If `None`,\n            use the setting in `hparams` to create the optimizer.\n        learning_rate (float or Tensor, optional): If `None`, learning rate\n            specified in :attr:`hparams`, or the default learning rate\n            of the optimizer will be used (if exists).\n        global_step (optional): A scalar int Tensor. Step counter to update on\n            each step unless :attr:`increment_global_step` is `False`.\n            Learning rate decay uses :attr:`global_step`.\n            If `None`, it will be fetched from the default graph (see\n            :tf_main:`tf.train.get_global_step <train/get_global_step>` for\n            more details). If it has not been created, no step will be\n            incremented with each weight update.\n        increment_global_step (bool): Whether to increment\n            :attr:`global_step`. This is useful if the :attr:`global_step` is\n            used in multiple training ops per training step (e.g. to optimize\n            different parts of the model) to avoid incrementing\n            :attr:`global_step` more times than necessary.\n        hparams (dict or HParams, optional): hyperparameters. Missing\n            hyperparameters are set to default values automatically. See\n            :func:`~texar.core.default_optimization_hparams` for\n            all hyperparameters and default values.\n\n    Returns:\n        train_op: the operator used for variables optimization.\n    \"\"\"\n    hparams = HParams(hparams, default_optimization_hparams())\n    grad_clip_fn = get_gradient_clip_fn(hparams[\"gradient_clip\"])\n\n    if not isinstance(optimizer, tf.train.Optimizer):\n        opt_hparams = hparams[\"optimizer\"]\n        optimizer_fn, optimizer_class = get_optimizer_fn(opt_hparams)\n        learning_rate = _get_static_lr(learning_rate, optimizer_class, hparams)\n        lr_decay_fn = get_learning_rate_decay_fn(\n            hparams[\"learning_rate_decay\"])\n        train_op = tf.contrib.layers.optimize_loss(\n            loss=loss,\n            global_step=global_step,\n            learning_rate=learning_rate,\n            optimizer=optimizer_fn,\n            gradient_noise_scale=hparams[\"gradient_noise_scale\"],\n            clip_gradients=grad_clip_fn,\n            learning_rate_decay_fn=lr_decay_fn,\n            variables=variables,\n            name=hparams[\"name\"],\n            increment_global_step=increment_global_step)\n\n    else:\n        train_op = tf.contrib.layers.optimize_loss(\n            loss=loss,\n            global_step=global_step,\n            learning_rate=None,\n            optimizer=optimizer,\n            gradient_noise_scale=hparams[\"gradient_noise_scale\"],\n            clip_gradients=grad_clip_fn,\n            variables=variables,\n            name=hparams[\"name\"],\n            increment_global_step=increment_global_step)\n\n    return train_op\n\nclass AdamWeightDecayOptimizer(tf.train.Optimizer):\n    \"\"\"\n    A basic Adam optimizer that includes \"correct\" L2 weight decay.\n    Copied from the google BERT repo.\n    Except that in `apply_gradient` function, we add the support to increment\n    the passed global step parameter, to make it more compatible to\n    tf.train.Optimizer implementation.\n    \"\"\"\n\n    def __init__(self,\n                 learning_rate,\n                 weight_decay_rate=0.0,\n                 beta_1=0.9,\n                 beta_2=0.999,\n                 epsilon=1e-6,\n                 exclude_from_weight_decay=None,\n                 name=\"AdamWeightDecayOptimizer\"):\n        \"\"\"Constructs a AdamWeightDecayOptimizer.\"\"\"\n        super(AdamWeightDecayOptimizer, self).__init__(False, name)\n\n        self.learning_rate = learning_rate\n        self.weight_decay_rate = weight_decay_rate\n        self.beta_1 = beta_1\n        self.beta_2 = beta_2\n        self.epsilon = epsilon\n        self.exclude_from_weight_decay = exclude_from_weight_decay\n\n    # pylint: disable=too-many-locals\n    def apply_gradients(self, grads_and_vars, global_step=None, name=None):\n        \"\"\"See base class.\"\"\"\n        # pylint: disable=redefined-argument-from-local\n        with tf.name_scope(name, self._name) as name:\n            assignments = []\n            for (grad, param) in grads_and_vars:\n                if grad is None or param is None:\n                    continue\n\n                param_name = self._get_variable_name(param.name)\n\n                m = tf.get_variable(\n                    name=param_name + \"/adam_m\",\n                    shape=param.shape.as_list(),\n                    dtype=tf.float32,\n                    trainable=False,\n                    initializer=tf.zeros_initializer())\n                v = tf.get_variable(\n                    name=param_name + \"/adam_v\",\n                    shape=param.shape.as_list(),\n                    dtype=tf.float32,\n                    trainable=False,\n                    initializer=tf.zeros_initializer())\n\n                # Standard Adam update.\n                next_m = (tf.multiply(self.beta_1, m)\\\n                          + tf.multiply(1.0 - self.beta_1,\n                                        grad))\n                next_v = (tf.multiply(self.beta_2, v)\\\n                          + tf.multiply(1.0 - self.beta_2,\n                                        tf.square(grad)))\n\n                update = next_m / (tf.sqrt(next_v) + self.epsilon)\n\n                # Just adding the square of the weights to the loss function is\n                # *not* the correct way of using L2 regularization/weight decay\n                # with Adam, since that will interact with the m and v\n                # parameters in strange ways.\n                # Instead we want ot decay the weights in a manner that doesn't\n                # interact with the m/v parameters.\n                # This is equivalent to adding the square\n                # of the weights to the loss with plain (non-momentum) SGD.\n                if self._do_use_weight_decay(param_name):\n                    update += self.weight_decay_rate * param\n\n                update_with_lr = self.learning_rate * update\n\n                next_param = param - update_with_lr\n\n                assignments.extend(\n                    [param.assign(next_param),\n                     m.assign(next_m),\n                     v.assign(next_v)])\n\n            update_ops = assignments\n            if global_step is None:\n                apply_updates = self._finish(update_ops, name)\n            else:\n                with tf.control_dependencies([self._finish(update_ops,\n                                                           \"update\")]):\n                    with tf.colocate_with(global_step):\n                        apply_updates = tf.assign_add(global_step, 1, name=name)\n\n        return apply_updates\n\n    def _do_use_weight_decay(self, param_name):\n        \"\"\"Whether to use L2 weight decay for `param_name`.\"\"\"\n        if not self.weight_decay_rate:\n            return False\n        if self.exclude_from_weight_decay:\n            for r in self.exclude_from_weight_decay:\n                if re.search(r, param_name) is not None:\n                    return False\n        return True\n\n    def _get_variable_name(self, param_name):\n        \"\"\"Get the variable name from the tensor name.\"\"\"\n        m = re.match(\"^(.*):\\\\d+$\", param_name)\n        if m is not None:\n            param_name = m.group(1)\n        return param_name\n"
  },
  {
    "path": "texar_repo/texar/core/optimization_test.py",
    "content": "#\n\"\"\"\nUnit tests for various optimization related utilities.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport numpy as np\n\nimport tensorflow as tf\n\nimport texar.core.optimization as opt\nfrom texar.utils import utils\n\n\nclass OptimizationTest(tf.test.TestCase):\n    \"\"\"Tests optimization.\n    \"\"\"\n\n    def test_get_optimizer(self):\n        \"\"\"Tests get_optimizer.\n        \"\"\"\n        default_optimizer_fn, optimizer_class = opt.get_optimizer_fn(\n            opt.default_optimization_hparams()[\"optimizer\"])\n        default_optimizer = default_optimizer_fn(1.0)\n        self.assertTrue(optimizer_class, tf.train.Optimizer)\n        self.assertIsInstance(default_optimizer, tf.train.AdamOptimizer)\n\n        hparams = {\n            \"type\": \"MomentumOptimizer\",\n            \"kwargs\": {\n                \"learning_rate\": 0.001,\n                \"momentum\": 0.9,\n                \"use_nesterov\": True\n            }\n        }\n        momentum_optimizer_fn, _ = opt.get_optimizer_fn(hparams)\n        momentum_optimizer = momentum_optimizer_fn()\n        self.assertIsInstance(momentum_optimizer, tf.train.MomentumOptimizer)\n\n        hparams = {\n            \"type\": tf.train.MomentumOptimizer,\n            \"kwargs\": {\n                \"momentum\": 0.9,\n                \"use_nesterov\": True\n            }\n        }\n        momentum_optimizer_fn, _ = opt.get_optimizer_fn(hparams)\n        momentum_optimizer = momentum_optimizer_fn(0.001)\n        self.assertIsInstance(momentum_optimizer, tf.train.MomentumOptimizer)\n\n        hparams = {\n            \"type\": tf.train.MomentumOptimizer(0.001, 0.9)\n        }\n        momentum_optimizer, _ = opt.get_optimizer_fn(hparams)\n        self.assertIsInstance(momentum_optimizer, tf.train.MomentumOptimizer)\n\n\n    def test_get_learning_rate_decay_fn(self): # pylint: disable=too-many-locals\n        \"\"\"Tests get_learning_rate_decay_fn.\n        \"\"\"\n        default_lr_decay_fn = opt.get_learning_rate_decay_fn(\n            opt.default_optimization_hparams()[\"learning_rate_decay\"])\n        self.assertIsNone(default_lr_decay_fn)\n\n        boundaries = [2, 4]\n        values = [0.1, 0.01, 0.001]\n        hparams = {\n            \"type\": \"piecewise_constant\",\n            \"kwargs\": {\n                \"boundaries\": boundaries,\n                \"values\": values\n            },\n            \"min_learning_rate\": 0.05,\n            \"start_decay_step\": 1,\n            \"end_decay_step\": utils.MAX_SEQ_LENGTH,\n        }\n        pc_lr_decay_fn = opt.get_learning_rate_decay_fn(hparams)\n\n        global_step = 1\n        pc_lr = pc_lr_decay_fn(learning_rate=1., global_step=global_step)\n        pc_lr_true = tf.train.piecewise_constant(\n            global_step-hparams[\"start_decay_step\"], boundaries, values)\n\n        hparams[\"type\"] = \"natural_exp_decay\"\n        hparams[\"kwargs\"] = {\n            \"decay_steps\": 1,\n            \"decay_rate\": 0.5\n        }\n        ned_lr_decay_fn = opt.get_learning_rate_decay_fn(hparams)\n        ned_lr = ned_lr_decay_fn(learning_rate=1., global_step=global_step)\n        ned_lr_true = tf.train.natural_exp_decay(\n            1., global_step-hparams[\"start_decay_step\"],\n            hparams[\"kwargs\"][\"decay_steps\"], hparams[\"kwargs\"][\"decay_rate\"])\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            pc_lr_, pc_lr_true_, ned_lr_, ned_lr_true_ = sess.run(\n                [pc_lr, pc_lr_true, ned_lr, ned_lr_true])\n            self.assertEqual(pc_lr_, pc_lr_true_)\n            self.assertEqual(ned_lr_, ned_lr_true_)\n\n    def test_get_gradient_clip_fn(self):    # pylint: disable=too-many-locals\n        \"\"\"Tests get_gradient_clip_fn.\n        \"\"\"\n        default_grad_clip_fn = opt.get_gradient_clip_fn(\n            opt.default_optimization_hparams()[\"gradient_clip\"])\n        self.assertIsNone(default_grad_clip_fn)\n\n        grads = [tf.random_uniform([10, 10], -1., 1.) for _ in range(5)]\n        grads_and_vars = list(zip(grads, range(5)))\n\n        hparams = {\n            \"type\": \"clip_by_global_norm\",\n            \"kwargs\": {\n                \"clip_norm\": 0.1\n            }\n        }\n        gn_grad_clip_fn = opt.get_gradient_clip_fn(hparams)\n        gn_grads_and_vars = gn_grad_clip_fn(grads_and_vars)\n        gn_grads, _ = zip(*gn_grads_and_vars)\n        gn_grads_true, _ = tf.clip_by_global_norm(\n            grads, hparams[\"kwargs\"][\"clip_norm\"])\n\n        hparams = {\n            \"type\": \"clip_by_value\",\n            \"kwargs\": {\n                \"clip_value_min\": -0.01,\n                \"clip_value_max\": 0.01\n            }\n        }\n        v_grad_clip_fn = opt.get_gradient_clip_fn(hparams)\n        v_grads_and_vars = v_grad_clip_fn(grads_and_vars)\n        v_grads, _ = zip(*v_grads_and_vars)\n        v_grads_true = tf.clip_by_value(grads,\n                                        hparams[\"kwargs\"][\"clip_value_min\"],\n                                        hparams[\"kwargs\"][\"clip_value_max\"])\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            gn_grads_, gn_grads_true_, v_grads_, v_grads_true_ = sess.run(\n                [gn_grads, gn_grads_true, v_grads, v_grads_true])\n            np.testing.assert_array_equal(gn_grads_, gn_grads_true_)\n            np.testing.assert_array_equal(v_grads_, v_grads_true_)\n\n    def test_get_train_op(self):\n        \"\"\"Tests get_train_op.\n        \"\"\"\n        var = tf.Variable(0.)\n        loss = tf.nn.l2_loss(var)\n        train_op = opt.get_train_op(loss)\n        self.assertTrue(tf.contrib.framework.is_tensor(train_op))\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/core/replay_memories.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nClasses and utilities for replay memory in RL.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom collections import deque\nimport random\n\nfrom texar.hyperparams import HParams\n\n__all__ = [\n    \"ReplayMemoryBase\",\n    \"DequeReplayMemory\"\n]\n\nclass ReplayMemoryBase(object):\n    \"\"\"Base class of replay memory inheritted by all replay memory classes.\n\n    Args:\n        hparams (dict or HParams, optional): Hyperparameters. Missing\n            hyperparameters are set to default values. See\n            :meth:`default_hparams` for the defaults.\n    \"\"\"\n    def __init__(self, hparams=None):\n        self._hparams = HParams(hparams, self.default_hparams())\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a `dict` of hyperparameters and their default values.\n\n        .. code-block:: python\n\n            {\n                'name': 'replay_memory'\n            }\n        \"\"\"\n        return {\n            'name': 'replay_memory'\n        }\n\n    def add(self, element):\n        \"\"\"Inserts a memory entry\n        \"\"\"\n        raise NotImplementedError\n\n    def get(self, size):\n        \"\"\"Pops a memory entry.\n        \"\"\"\n        raise NotImplementedError\n\n    def last(self):\n        \"\"\"Returns the latest element in the memeory.\n        \"\"\"\n        raise NotImplementedError\n\n    def size(self):\n        \"\"\"Returns the current size of the memory.\n        \"\"\"\n        raise NotImplementedError\n\n\nclass DequeReplayMemory(ReplayMemoryBase):\n    \"\"\"A deque based replay memory that accepts new memory entry and deletes\n    oldest memory entry if exceeding the capacity. Memory entries are\n    accessed in random order.\n\n    Args:\n        hparams (dict or HParams, optional): Hyperparameters. Missing\n            hyperparameters are set to default values. See\n            :meth:`default_hparams` for the defaults.\n    \"\"\"\n    def __init__(self, hparams=None):\n        ReplayMemoryBase.__init__(self, hparams)\n        self.deque = deque()\n        self.capacity = self._hparams.capacity\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a `dict` of hyperparameters and their default values.\n\n        .. code-block:: python\n\n            {\n                'capacity': 80000,\n                'name': 'deque_replay_memory',\n            }\n\n        Here:\n\n        \"capacity\" : int\n            Maximum size of memory kept. Deletes oldest memories if exceeds\n            the capacity.\n        \"\"\"\n        return {\n            'name': 'deque_replay_memory',\n            'capacity': 80000\n        }\n\n    def add(self, element):\n        \"\"\"Appends element to the memory and deletes old memory if exceeds\n        the capacity.\n        \"\"\"\n        self.deque.append(element)\n        if len(self.deque) > self.capacity:\n            self.deque.popleft()\n\n    #TODO(zhiting): is it okay to have stand alone random generator ?\n    def get(self, size):\n        \"\"\"Randomly samples :attr:`size` entries from the memory. Returns\n        a list.\n        \"\"\"\n        return random.sample(self.deque, size)\n\n    def last(self):\n        \"\"\"Returns the latest element in the memeory.\n        \"\"\"\n        return self.deque[-1]\n\n    def size(self):\n        \"\"\"Returns the current size of the memory.\n        \"\"\"\n        return len(self.deque)\n"
  },
  {
    "path": "texar_repo/texar/data/__init__.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nModules of texar library data.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=wildcard-import\n\nfrom texar.data.data_utils import *\nfrom texar.data.data import *\nfrom texar.data.data_decoders import *\nfrom texar.data.vocabulary import *\nfrom texar.data.embedding import *\n"
  },
  {
    "path": "texar_repo/texar/data/data/__init__.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nModules of texar library data inputs.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=wildcard-import\n\nfrom texar.data.data.data_base import *\nfrom texar.data.data.scalar_data import *\nfrom texar.data.data.text_data_base import *\nfrom texar.data.data.mono_text_data import *\nfrom texar.data.data.paired_text_data import *\nfrom texar.data.data.multi_aligned_data import *\nfrom texar.data.data.data_iterators import *\nfrom texar.data.data.dataset_utils import *\n"
  },
  {
    "path": "texar_repo/texar/data/data/data_base.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nBase data class that is enherited by all data classes.\nA data defines data reading, parsing, batching, and other\npreprocessing operations.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport tensorflow as tf\n\nfrom texar.hyperparams import HParams\nfrom texar.data.data import dataset_utils as dsutils\nfrom texar.data.data_utils import count_file_lines\n\n__all__ = [\n    \"DataBase\"\n]\n\nclass DataBase(object):\n    \"\"\"Base class inheritted by all data classes.\n    \"\"\"\n\n    def __init__(self, hparams):\n        self._hparams = HParams(hparams, self.default_hparams())\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of default hyperparameters.\n\n        .. code-block:: python\n\n            {\n                \"num_epochs\": 1,\n                \"batch_size\": 64,\n                \"allow_smaller_final_batch\": True,\n                \"shuffle\": True,\n                \"shuffle_buffer_size\": None,\n                \"shard_and_shuffle\": False,\n                \"num_parallel_calls\": 1,\n                \"prefetch_buffer_size\": 0,\n                \"max_dataset_size\": -1,\n                \"seed\": None,\n                \"name\": \"data\",\n            }\n\n        Here:\n\n            \"num_epochs\" : int\n                Number of times the dataset should be repeated. An\n                :tf_main:`OutOfRangeError <errors/OutOfRangeError>` signal will\n                be raised after the whole repeated dataset has been iterated\n                through.\n\n                E.g., For training data, set it to 1 (default) so that you\n                will get the signal after each epoch of training. Set to -1\n                to repeat the dataset indefinitely.\n\n            \"batch_size\" : int\n                Batch size, i.e., the number of consecutive elements of the\n                dataset to combine in a single batch.\n\n            \"allow_smaller_final_batch\" : bool\n               Whether to allow the final batch to be smaller if there are\n               insufficient elements left. If `False`, the final batch is\n               discarded if it is smaller than batch size. Note that,\n               if `True`, `output_shapes` of the resulting dataset\n               will have a a **static** batch_size dimension equal to\n               \"batch_size\".\n\n            \"shuffle\" : bool\n                Whether to randomly shuffle the elements of the dataset.\n\n            \"shuffle_buffer_size\" : int\n                The buffer size for data shuffling. The larger, the better\n                the resulting data is mixed.\n\n                If `None` (default), buffer size is set to the size of the\n                whole dataset (i.e., make the shuffling the maximally\n                effective).\n\n            \"shard_and_shuffle\" : bool\n                Whether to first shard the dataset and then shuffle each\n                block respectively. Useful when the whole data is too large to\n                be loaded efficiently into the memory.\n\n                If `True`, :attr:`shuffle_buffer_size` must be specified to\n                determine the size of each shard.\n\n            \"num_parallel_calls\" : int\n                Number of elements from the datasets to process in parallel.\n\n            \"prefetch_buffer_size\" : int\n                The maximum number of elements that will be buffered when\n                prefetching.\n\n            max_dataset_size : int\n                Maximum number of instances to include in\n                the dataset. If set to `-1` or greater than the size of\n                dataset, all instances will be included. This constraint is\n                imposed after data shuffling and filtering.\n\n            seed : int, optional\n                The random seed for shuffle.\n\n                Note that if a seed is set, the shuffle order will be exact\n                the same every time when going through the (repeated) dataset.\n\n                For example, consider a dataset with elements [1, 2, 3], with\n                \"num_epochs\"`=2` and some fixed seed, the resulting sequence\n                can be: 2 1 3, 1 3 2 | 2 1 3, 1 3 2, ... That is, the orders are\n                different **within** every `num_epochs`, but are the same\n                **across** the `num_epochs`.\n\n            name : str\n                Name of the data.\n        \"\"\"\n        return {\n            \"name\": \"data\",\n            \"num_epochs\": 1,\n            \"batch_size\": 64,\n            \"allow_smaller_final_batch\": True,\n            \"shuffle\": True,\n            \"shuffle_buffer_size\": None,\n            \"shard_and_shuffle\": False,\n            \"num_parallel_calls\": 1,\n            \"prefetch_buffer_size\": 0,\n            \"max_dataset_size\": -1,\n            \"seed\": None\n        }\n\n    @staticmethod\n    def _make_batch(dataset, hparams, padded_batch=False, padding_values=None):\n        dataset = dataset.repeat(hparams.num_epochs)\n        batch_size = hparams[\"batch_size\"]\n        if hparams[\"allow_smaller_final_batch\"]:\n            if padded_batch:\n                dataset = dataset.padded_batch(\n                    batch_size, dataset.output_shapes,\n                    padding_values=padding_values)\n            else:\n                dataset = dataset.batch(batch_size)\n        else:\n            dataset = dataset.apply(\n                tf.contrib.data.padded_batch_and_drop_remainder(\n                    batch_size, dataset.output_shapes,\n                    padding_values=padding_values))\n        return dataset\n\n    @staticmethod\n    def _shuffle_dataset(dataset, hparams, dataset_files):\n        dataset_size = None\n        shuffle_buffer_size = hparams[\"shuffle_buffer_size\"]\n        if hparams[\"shard_and_shuffle\"]:\n            if shuffle_buffer_size is None:\n                raise ValueError(\n                    \"Dataset hyperparameter 'shuffle_buffer_size' \"\n                    \"must not be `None` if 'shard_and_shuffle'=`True`.\")\n            dataset_size = count_file_lines(dataset_files)\n            if shuffle_buffer_size >= dataset_size:\n                raise ValueError(\n                    \"Dataset size (%d) <= shuffle_buffer_size (%d). Set \"\n                    \"shuffle_and_shard to `False`.\" %\n                    (dataset_size, shuffle_buffer_size))\n            #TODO(zhiting): Use a different seed?\n            dataset = dataset.apply(dsutils.random_shard_dataset(\n                dataset_size, shuffle_buffer_size, hparams[\"seed\"]))\n            dataset = dataset.shuffle(shuffle_buffer_size + 16, # add a margin\n                                      seed=hparams[\"seed\"])\n        elif hparams[\"shuffle\"]:\n            if shuffle_buffer_size is None:\n                dataset_size = count_file_lines(dataset_files)\n                shuffle_buffer_size = dataset_size\n            dataset = dataset.shuffle(shuffle_buffer_size, seed=hparams[\"seed\"])\n\n        return dataset, dataset_size\n\n    @property\n    def num_epochs(self):\n        \"\"\"Number of epochs.\n        \"\"\"\n        return self._hparams.num_epochs\n\n    @property\n    def batch_size(self):\n        \"\"\"The batch size.\n        \"\"\"\n        return self._hparams.batch_size\n\n    @property\n    def hparams(self):\n        \"\"\"A :class:`~texar.HParams` instance of the\n        data hyperparameters.\n        \"\"\"\n        return self._hparams\n\n    @property\n    def name(self):\n        \"\"\"Name of the module.\n        \"\"\"\n        return self._hparams.name\n\n"
  },
  {
    "path": "texar_repo/texar/data/data/data_iterators.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nVarious data iterator classes.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport tensorflow as tf\n\nimport texar as tx\nfrom texar.utils.variables import get_unique_named_variable_scope\n\n__all__ = [\n    \"DataIteratorBase\",\n    \"DataIterator\",\n    \"TrainTestDataIterator\",\n    \"FeedableDataIterator\",\n    \"TrainTestFeedableDataIterator\"\n]\n\nclass DataIteratorBase(object):\n    \"\"\"Base class for all data iterator classes to inherit. A data iterator\n    is a wrapper of :tf_main:`tf.data.Iterator <data/Iterator>`, and can\n    switch between and iterate through **multiple** datasets.\n\n    Args:\n        datasets: Datasets to iterates through. This can be:\n\n            - A single instance of :tf_main:`tf.data.Dataset <data/Dataset>` \\\n            or instance of subclass of :class:`~texar.data.DataBase`.\n            - A `dict` that maps dataset name to \\\n            instance of :tf_main:`tf.data.Dataset <data/Dataset>` or \\\n            subclass of :class:`~texar.data.DataBase`.\n            - A `list` of instances of subclasses of \\\n            :class:`texar.data.DataBase`. The name of instances \\\n            (:attr:`texar.data.DataBase.name`) must be unique.\n    \"\"\"\n\n    def __init__(self, datasets):\n        self._default_dataset_name = 'data'\n        if isinstance(datasets, (tf.data.Dataset, tx.data.DataBase)):\n            datasets = {self._default_dataset_name: datasets}\n        elif isinstance(datasets, (list, tuple)):\n            if any(not isinstance(d, tx.data.DataBase) for d in datasets):\n                raise ValueError(\"`datasets` must be an non-empty list of \"\n                                 \"`texar.data.DataBase` instances.\")\n            num_datasets = len(datasets)\n            datasets = {d.name: d for d in datasets}\n            if len(datasets) < num_datasets:\n                raise ValueError(\"Names of datasets must be unique.\")\n\n        _datasets = {}\n        for k, v in datasets.items(): # pylint: disable=invalid-name\n            _datasets[k] = v if isinstance(v, tf.data.Dataset) else v.dataset\n        self._datasets = _datasets\n\n        if len(self._datasets) <= 0:\n            raise ValueError(\"`datasets` must not be empty.\")\n\n    @property\n    def num_datasets(self):\n        \"\"\"Number of datasets.\n        \"\"\"\n        return len(self._datasets)\n\n    @property\n    def dataset_names(self):\n        \"\"\"A list of dataset names.\n        \"\"\"\n        return list(self._datasets.keys())\n\n\nclass DataIterator(DataIteratorBase):\n    \"\"\"Data iterator that switches and iterates through multiple datasets.\n\n    This is a wrapper of TF reinitializble :tf_main:`iterator <data/Iterator>`.\n\n    Args:\n        datasets: Datasets to iterates through. This can be:\n\n            - A single instance of :tf_main:`tf.data.Dataset <data/Dataset>` \\\n            or instance of subclass of :class:`~texar.data.DataBase`.\n            - A `dict` that maps dataset name to \\\n            instance of :tf_main:`tf.data.Dataset <data/Dataset>` or \\\n            subclass of :class:`~texar.data.DataBase`.\n            - A `list` of instances of subclasses of \\\n            :class:`texar.data.DataBase`. The name of instances \\\n            (:attr:`texar.data.DataBase.name`) must be unique.\n\n    Example:\n\n        .. code-block:: python\n\n            train_data = MonoTextData(hparams_train)\n            test_data = MonoTextData(hparams_test)\n            iterator = DataIterator({'train': train_data, 'test': test_data})\n            batch = iterator.get_next()\n\n            sess = tf.Session()\n\n            for _ in range(200): # Run 200 epochs of train/test\n                # Starts iterating through training data from the beginning\n                iterator.switch_to_dataset(sess, 'train')\n                while True:\n                    try:\n                        train_batch_ = sess.run(batch)\n                    except tf.errors.OutOfRangeError:\n                        print(\"End of training epoch.\")\n                # Starts iterating through test data from the beginning\n                iterator.switch_to_dataset(sess, 'test')\n                while True:\n                    try:\n                        test_batch_ = sess.run(batch)\n                    except tf.errors.OutOfRangeError:\n                        print(\"End of test epoch.\")\n    \"\"\"\n\n    def __init__(self, datasets):\n        DataIteratorBase.__init__(self, datasets)\n\n        self._variable_scope = get_unique_named_variable_scope('data_iterator')\n        with tf.variable_scope(self._variable_scope):\n            first_dataset = self._datasets[sorted(self.dataset_names)[0]]\n            self._iterator = tf.data.Iterator.from_structure(\n                first_dataset.output_types, first_dataset.output_shapes)\n            self._iterator_init_ops = {\n                name: self._iterator.make_initializer(d)\n                for name, d in self._datasets.items()\n            }\n\n    def switch_to_dataset(self, sess, dataset_name=None):\n        \"\"\"Re-initializes the iterator of a given dataset and starts iterating\n        over the dataset (from the beginning).\n\n        Args:\n            sess: The current tf session.\n            dataset_name (optional): Name of the dataset. If not provided,\n                there must be only one Dataset.\n        \"\"\"\n        if dataset_name is None:\n            if self.num_datasets > 1:\n                raise ValueError(\"`dataset_name` is required if there are \"\n                                 \"more than one datasets.\")\n            dataset_name = next(iter(self._datasets))\n        if dataset_name not in self._datasets:\n            raise ValueError(\"Dataset not found: \", dataset_name)\n        sess.run(self._iterator_init_ops[dataset_name])\n\n    def get_next(self):\n        \"\"\"Returns the next element of the activated dataset.\n        \"\"\"\n        return self._iterator.get_next()\n\nclass TrainTestDataIterator(DataIterator):\n    \"\"\"Data iterator that alternatives between train, val, and test datasets.\n\n    :attr:`train`, :attr:`val`, and :attr:`test` can be instance of\n    either :tf_main:`tf.data.Dataset <data/Dataset>` or subclass of\n    :class:`~texar.data.DataBase`. At least one of them must be provided.\n\n    This is a wrapper of :class:`~texar.data.DataIterator`.\n\n    Args:\n        train (optional): Training data.\n        val (optional): Validation data.\n        test (optional): Test data.\n\n    Example:\n\n        .. code-block:: python\n\n            train_data = MonoTextData(hparams_train)\n            val_data = MonoTextData(hparams_val)\n            iterator = TrainTestDataIterator(train=train_data, val=val_data)\n            batch = iterator.get_next()\n\n            sess = tf.Session()\n\n            for _ in range(200): # Run 200 epochs of train/val\n                # Starts iterating through training data from the beginning\n                iterator.switch_to_train_data(sess)\n                while True:\n                    try:\n                        train_batch_ = sess.run(batch)\n                    except tf.errors.OutOfRangeError:\n                        print(\"End of training epoch.\")\n                # Starts iterating through val data from the beginning\n                iterator.switch_to_val_dataset(sess)\n                while True:\n                    try:\n                        val_batch_ = sess.run(batch)\n                    except tf.errors.OutOfRangeError:\n                        print(\"End of val epoch.\")\n    \"\"\"\n\n    def __init__(self, train=None, val=None, test=None):\n        dataset_dict = {}\n        self._train_name = 'train'\n        self._val_name = 'val'\n        self._test_name = 'test'\n        if train is not None:\n            dataset_dict[self._train_name] = train\n        if val is not None:\n            dataset_dict[self._val_name] = val\n        if test is not None:\n            dataset_dict[self._test_name] = test\n        if len(dataset_dict) == 0:\n            raise ValueError(\"At least one of `train`, `val`, and `test` \"\n                             \"must be provided.\")\n\n        DataIterator.__init__(self, dataset_dict)\n\n\n    def switch_to_train_data(self, sess):\n        \"\"\"Starts to iterate through training data (from the beginning).\n\n        Args:\n            sess: The current tf session.\n        \"\"\"\n        if self._train_name not in self._datasets:\n            raise ValueError(\"Training data not provided.\")\n        self.switch_to_dataset(sess, self._train_name)\n\n    def switch_to_val_data(self, sess):\n        \"\"\"Starts to iterate through val data (from the beginning).\n\n        Args:\n            sess: The current tf session.\n        \"\"\"\n        if self._val_name not in self._datasets:\n            raise ValueError(\"Val data not provided.\")\n        self.switch_to_dataset(sess, self._val_name)\n\n    def switch_to_test_data(self, sess):\n        \"\"\"Starts to iterate through test data (from the beginning).\n\n        Args:\n            sess: The current tf session.\n        \"\"\"\n        if self._test_name not in self._datasets:\n            raise ValueError(\"Test data not provided.\")\n        self.switch_to_dataset(sess, self._test_name)\n\nclass FeedableDataIterator(DataIteratorBase):\n    \"\"\"Data iterator that iterates through **multiple** datasets and switches\n    between datasets.\n\n    The iterator can switch to a dataset and resume from where we\n    left off last time we visited the dataset. This is a wrapper of TF\n    feedable :tf_main:`iterator <data/Iterator>`.\n\n    Args:\n        datasets: Datasets to iterates through. This can be:\n\n            - A single instance of :tf_main:`tf.data.Dataset <data/Dataset>` \\\n            or instance of subclass of :class:`~texar.data.DataBase`.\n            - A `dict` that maps dataset name to \\\n            instance of :tf_main:`tf.data.Dataset <data/Dataset>` or \\\n            subclass of :class:`~texar.data.DataBase`.\n            - A `list` of instances of subclasses of \\\n            :class:`texar.data.DataBase`. The name of instances \\\n            (:attr:`texar.data.DataBase.name`) must be unique.\n\n    Example:\n\n        .. code-block:: python\n\n            train_data = MonoTextData(hparams={'num_epochs': 200, ...})\n            test_data = MonoTextData(hparams_test)\n            iterator = FeedableDataIterator({'train': train_data,\n                                             'test': test_data})\n            batch = iterator.get_next()\n\n            sess = tf.Session()\n\n            def _eval_epoch(): # Iterate through test data for one epoch\n                # Initialize and start from beginning of test data\n                iterator.initialize_dataset(sess, 'test')\n                while True:\n                    try:\n                        fetch_dict = { # Read from test data\n                            iterator.handle: Iterator.get_handle(sess, 'test')\n                        }\n                        test_batch_ = sess.run(batch, feed_dict=feed_dict)\n                    except tf.errors.OutOfRangeError:\n                        print(\"End of val epoch.\")\n\n            # Initialize and start from beginning of training data\n            iterator.initialize_dataset(sess, 'train')\n            step = 0\n            while True:\n                try:\n                    fetch_dict = { # Read from training data\n                        iterator.handle: Iterator.get_handle(sess, 'train')\n                    }\n                    train_batch_ = sess.run(batch, fetch_dict=fetch_dict)\n\n                    step +=1\n                    if step % 200 == 0: # Evaluate periodically\n                        _eval_epoch()\n                except tf.errors.OutOfRangeError:\n                    print(\"End of training.\")\n    \"\"\"\n\n    def __init__(self, datasets):\n        DataIteratorBase.__init__(self, datasets)\n\n        self._variable_scope = get_unique_named_variable_scope(\n            'feedable_data_iterator')\n        with tf.variable_scope(self._variable_scope):\n            self._handle = tf.placeholder(tf.string, shape=[], name='handle')\n            first_dataset = self._datasets[sorted(self.dataset_names)[0]]\n            self._iterator = tf.data.Iterator.from_string_handle(\n                self._handle, first_dataset.output_types,\n                first_dataset.output_shapes)\n\n            self._dataset_iterators = {\n                name: dataset.make_initializable_iterator()\n                for name, dataset in self._datasets.items()\n            }\n\n    def get_handle(self, sess, dataset_name=None):\n        \"\"\"Returns a dataset handle used to feed the\n        :attr:`handle` placeholder to fetch data from the dataset.\n\n        Args:\n            sess: The current tf session.\n            dataset_name (optional): Name of the dataset. If not provided,\n                there must be only one Dataset.\n\n        Returns:\n            A string handle to be fed to the :attr:`handle` placeholder.\n\n        Example:\n\n            .. code-block:: python\n\n                next_element = iterator.get_next()\n                train_handle = iterator.get_handle(sess, 'train')\n                # Gets the next training element\n                ne_ = sess.run(next_element,\n                               feed_dict={iterator.handle: train_handle})\n        \"\"\"\n        if dataset_name is None:\n            if self.num_datasets > 1:\n                raise ValueError(\"`dataset_name` is required if there are \"\n                                 \"more than one datasets.\")\n            dataset_name = next(iter(self._datasets))\n        if dataset_name not in self._datasets:\n            raise ValueError(\"Dataset not found: \", dataset_name)\n        return sess.run(self._dataset_iterators[dataset_name].string_handle())\n\n    def restart_dataset(self, sess, dataset_name=None):\n        \"\"\"Restarts datasets so that next iteration will fetch data from\n        the beginning of the datasets.\n\n        Args:\n            sess: The current tf session.\n            dataset_name (optional): A dataset name or a list of dataset names\n                that specifies which dataset(s) to restart. If `None`, all\n                datasets are restart.\n        \"\"\"\n        self.initialize_dataset(sess, dataset_name)\n\n    def initialize_dataset(self, sess, dataset_name=None):\n        \"\"\"Initializes datasets. A dataset must be initialized before being\n        used.\n\n        Args:\n            sess: The current tf session.\n            dataset_name (optional): A dataset name or a list of dataset names\n                that specifies which dataset(s) to initialize. If `None`, all\n                datasets are initialized.\n        \"\"\"\n        if dataset_name is None:\n            dataset_name = self.dataset_names\n        if not isinstance(dataset_name, (tuple, list)):\n            dataset_name = [dataset_name]\n\n        for name in dataset_name:\n            sess.run(self._dataset_iterators[name].initializer)\n\n    def get_next(self):\n        \"\"\"Returns the next element of the activated dataset.\n        \"\"\"\n        return self._iterator.get_next()\n\n    @property\n    def handle(self):\n        \"\"\"The handle placeholder that can be fed with a dataset handle to\n        fetch data from the dataset.\n        \"\"\"\n        return self._handle\n\nclass TrainTestFeedableDataIterator(FeedableDataIterator):\n    \"\"\"Feedable data iterator that alternatives between train, val, and test\n    datasets.\n\n    This is a wrapper of :class:`~texar.data.FeedableDataIterator`.\n    The iterator can switch to a dataset and resume from where it was\n    left off when it was visited last time.\n\n    :attr:`train`, :attr:`val`, and :attr:`test` can be instance of\n    either :tf_main:`tf.data.Dataset <data/Dataset>` or subclass of\n    :class:`~texar.data.DataBase`. At least one of them must be provided.\n\n    Args:\n        train (optional): Training data.\n        val (optional): Validation data.\n        test (optional): Test data.\n\n    Example:\n\n        .. code-block:: python\n\n            train_data = MonoTextData(hparams={'num_epochs': 200, ...})\n            test_data = MonoTextData(hparams_test)\n            iterator = TrainTestFeedableDataIterator(train=train_data,\n                                                     test=test_data)\n            batch = iterator.get_next()\n\n            sess = tf.Session()\n\n            def _eval_epoch(): # Iterate through test data for one epoch\n                # Initialize and start from beginning of test data\n                iterator.initialize_test_dataset(sess)\n                while True:\n                    try:\n                        fetch_dict = { # Read from test data\n                            iterator.handle: Iterator.get_test_handle(sess)\n                        }\n                        test_batch_ = sess.run(batch, feed_dict=feed_dict)\n                    except tf.errors.OutOfRangeError:\n                        print(\"End of test epoch.\")\n\n            # Initialize and start from beginning of training data\n            iterator.initialize_train_dataset(sess)\n            step = 0\n            while True:\n                try:\n                    fetch_dict = { # Read from training data\n                        iterator.handle: Iterator.get_train_handle(sess)\n                    }\n                    train_batch_ = sess.run(batch, fetch_dict=fetch_dict)\n\n                    step +=1\n                    if step % 200 == 0: # Evaluate periodically\n                        _eval_epoch()\n                except tf.errors.OutOfRangeError:\n                    print(\"End of training.\")\n    \"\"\"\n\n    def __init__(self, train=None, val=None, test=None):\n        dataset_dict = {}\n        self._train_name = 'train'\n        self._val_name = 'val'\n        self._test_name = 'test'\n        if train is not None:\n            dataset_dict[self._train_name] = train\n        if val is not None:\n            dataset_dict[self._val_name] = val\n        if test is not None:\n            dataset_dict[self._test_name] = test\n        if len(dataset_dict) == 0:\n            raise ValueError(\"At least one of `train`, `val`, and `test` \"\n                             \"must be provided.\")\n\n        FeedableDataIterator.__init__(self, dataset_dict)\n\n    def get_train_handle(self, sess):\n        \"\"\"Returns the handle of the training dataset. The handle can be used\n        to feed the :attr:`handle` placeholder to fetch training data.\n\n        Args:\n            sess: The current tf session.\n\n        Returns:\n            A string handle to be fed to the :attr:`handle` placeholder.\n\n        Example:\n\n            .. code-block:: python\n\n                next_element = iterator.get_next()\n                train_handle = iterator.get_train_handle(sess)\n                # Gets the next training element\n                ne_ = sess.run(next_element,\n                               feed_dict={iterator.handle: train_handle})\n        \"\"\"\n        if self._train_name not in self._datasets:\n            raise ValueError(\"Training data not provided.\")\n        return self.get_handle(sess, self._train_name)\n\n    def get_val_handle(self, sess):\n        \"\"\"Returns the handle of the validation dataset. The handle can be used\n        to feed the :attr:`handle` placeholder to fetch validation data.\n\n        Args:\n            sess: The current tf session.\n\n        Returns:\n            A string handle to be fed to the :attr:`handle` placeholder.\n        \"\"\"\n        if self._val_name not in self._datasets:\n            raise ValueError(\"Val data not provided.\")\n        return self.get_handle(sess, self._val_name)\n\n    def get_test_handle(self, sess):\n        \"\"\"Returns the handle of the test dataset. The handle can be used\n        to feed the :attr:`handle` placeholder to fetch test data.\n\n        Args:\n            sess: The current tf session.\n\n        Returns:\n            A string handle to be fed to the :attr:`handle` placeholder.\n        \"\"\"\n        if self._test_name not in self._datasets:\n            raise ValueError(\"Test data not provided.\")\n        return self.get_handle(sess, self._test_name)\n\n    def restart_train_dataset(self, sess):\n        \"\"\"Restarts the training dataset so that next iteration will fetch\n        data from the beginning of the training dataset.\n\n        Args:\n            sess: The current tf session.\n        \"\"\"\n        if self._train_name not in self._datasets:\n            raise ValueError(\"Training data not provided.\")\n        self.restart_dataset(sess, self._train_name)\n\n    def restart_val_dataset(self, sess):\n        \"\"\"Restarts the validation dataset so that next iteration will fetch\n        data from the beginning of the validation dataset.\n\n        Args:\n            sess: The current tf session.\n        \"\"\"\n        if self._val_name not in self._datasets:\n            raise ValueError(\"Val data not provided.\")\n        self.restart_dataset(sess, self._val_name)\n\n    def restart_test_dataset(self, sess):\n        \"\"\"Restarts the test dataset so that next iteration will fetch\n        data from the beginning of the test dataset.\n\n        Args:\n            sess: The current tf session.\n        \"\"\"\n        if self._test_name not in self._datasets:\n            raise ValueError(\"Test data not provided.\")\n        self.restart_dataset(sess, self._test_name)\n"
  },
  {
    "path": "texar_repo/texar/data/data/data_iterators_test.py",
    "content": "# -*- coding: utf-8 -*-\n#\n\"\"\"\nUnit tests for data iterator related operations.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\n# pylint: disable=no-member, invalid-name\n\nimport tempfile\nimport numpy as np\n\nimport tensorflow as tf\n\nimport texar as tx\n\n\nclass DataIteratorTest(tf.test.TestCase):\n    \"\"\"Tests data iterators.\n    \"\"\"\n\n    def setUp(self):\n        tf.test.TestCase.setUp(self)\n\n        # Create data\n        train_text = list(np.linspace(1, 1000, num=1000, dtype=np.int64))\n        train_text = [str(x) for x in train_text]\n        train_text_file = tempfile.NamedTemporaryFile()\n        train_text_file.write('\\n'.join(train_text).encode(\"utf-8\"))\n        train_text_file.flush()\n        self._train_text_file = train_text_file\n\n        test_text = list(np.linspace(1001, 2000, num=1000, dtype=np.int64))\n        test_text = [str(x) for x in test_text]\n        test_text_file = tempfile.NamedTemporaryFile()\n        test_text_file.write('\\n'.join(test_text).encode(\"utf-8\"))\n        test_text_file.flush()\n        self._test_text_file = test_text_file\n\n        vocab_list = train_text + test_text\n        vocab_file = tempfile.NamedTemporaryFile()\n        vocab_file.write('\\n'.join(vocab_list).encode(\"utf-8\"))\n        vocab_file.flush()\n        self._vocab_file = vocab_file\n        self._vocab_size = len(vocab_list)\n\n        self._train_hparams = {\n            \"num_epochs\": 2,\n            \"batch_size\": 1,\n            \"shuffle\": False,\n            \"dataset\": {\n                \"files\": self._train_text_file.name,\n                \"vocab_file\": self._vocab_file.name,\n                \"bos_token\": '',\n                \"eos_token\": ''\n            },\n            \"name\": \"train\"\n        }\n\n        self._test_hparams = {\n            \"num_epochs\": 1,\n            \"batch_size\": 1,\n            \"shuffle\": False,\n            \"dataset\": {\n                \"files\": self._test_text_file.name,\n                \"vocab_file\": self._vocab_file.name,\n                \"bos_token\": '',\n                \"eos_token\": ''\n            },\n            \"name\": \"test\"\n        }\n\n    def test_iterator_single_dataset(self):\n        \"\"\"Tests iterating over a single dataset.\n        \"\"\"\n        data = tx.data.MonoTextData(self._test_hparams)\n\n        iterator = tx.data.DataIterator(data)\n        data_batch = iterator.get_next()\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            sess.run(tf.local_variables_initializer())\n            sess.run(tf.tables_initializer())\n\n            for _ in range(2):\n                iterator.switch_to_dataset(sess)\n                i = 1001\n                while True:\n                    try:\n                        data_batch_ = sess.run(data_batch)\n                        self.assertEqual(\n                            tf.compat.as_text(data_batch_['text'][0][0]),\n                            str(i))\n                        i += 1\n                    except tf.errors.OutOfRangeError:\n                        print('Done -- epoch limit reached')\n                        self.assertEqual(i, 2001)\n                        break\n\n\n    def test_iterator_multi_datasets(self):\n        \"\"\"Tests iterating over multiple datasets.\n        \"\"\"\n        train_data = tx.data.MonoTextData(self._train_hparams)\n        test_data = tx.data.MonoTextData(self._test_hparams)\n\n        iterator = tx.data.DataIterator([train_data, test_data])\n        data_batch = iterator.get_next()\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            sess.run(tf.local_variables_initializer())\n            sess.run(tf.tables_initializer())\n\n            for _ in range(2):\n                # Iterates over train data\n                iterator.switch_to_dataset(sess, train_data.name)\n                i = 0\n                while True:\n                    try:\n                        data_batch_ = sess.run(data_batch)\n                        self.assertEqual(\n                            tf.compat.as_text(data_batch_['text'][0][0]),\n                            str(i+1))\n                        i = (i+1) % 1000\n                    except tf.errors.OutOfRangeError:\n                        print('Train data limit reached')\n                        self.assertEqual(i, 0)\n                        break\n\n                # Iterates over test data\n                iterator.switch_to_dataset(sess, test_data.name)\n                i = 1001\n                while True:\n                    try:\n                        data_batch_ = sess.run(data_batch)\n                        self.assertEqual(\n                            tf.compat.as_text(data_batch_['text'][0][0]),\n                            str(i))\n                        i += 1\n                    except tf.errors.OutOfRangeError:\n                        print('Test data limit reached')\n                        self.assertEqual(i, 2001)\n                        break\n\n    def test_train_test_data_iterator(self):\n        \"\"\"Tests :class:`texar.data.TrainTestDataIterator`\n        \"\"\"\n        train_data = tx.data.MonoTextData(self._train_hparams)\n        test_data = tx.data.MonoTextData(self._test_hparams)\n\n        iterator = tx.data.TrainTestDataIterator(train=train_data,\n                                                 test=test_data)\n        data_batch = iterator.get_next()\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            sess.run(tf.local_variables_initializer())\n            sess.run(tf.tables_initializer())\n\n            for _ in range(2):\n                iterator.switch_to_train_data(sess)\n                i = 0\n                while True:\n                    try:\n                        data_batch_ = sess.run(data_batch)\n                        self.assertEqual(\n                            tf.compat.as_text(data_batch_['text'][0][0]),\n                            str(i+1))\n                        i = (i+1) % 1000\n                    except tf.errors.OutOfRangeError:\n                        print('Train data limit reached')\n                        self.assertEqual(i, 0)\n                        break\n\n                iterator.switch_to_test_data(sess)\n                i = 1001\n                while True:\n                    try:\n                        data_batch_ = sess.run(data_batch)\n                        self.assertEqual(\n                            tf.compat.as_text(data_batch_['text'][0][0]),\n                            str(i))\n                        i += 1\n                    except tf.errors.OutOfRangeError:\n                        print('Test data limit reached')\n                        self.assertEqual(i, 2001)\n                        break\n\n    def test_feedable_iterator_multi_datasets(self):\n        \"\"\"Tests iterating over multiple datasets with the\n        :class:`FeedableDataIterator`.\n        \"\"\"\n        train_data = tx.data.MonoTextData(self._train_hparams)\n        test_data = tx.data.MonoTextData(self._test_hparams)\n\n        iterator = tx.data.FeedableDataIterator([train_data, test_data])\n        data_batch = iterator.get_next()\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            sess.run(tf.local_variables_initializer())\n            sess.run(tf.tables_initializer())\n\n            iterator.initialize_dataset(sess)\n\n            for _ in range(2):\n                # Iterates over train data\n                iterator.restart_dataset(sess, train_data.name)\n                data_handle = iterator.get_handle(sess, train_data.name)\n                i = 0\n                while True:\n                    try:\n                        feed_dict = {iterator.handle: data_handle}\n                        data_batch_ = sess.run(data_batch, feed_dict=feed_dict)\n                        self.assertEqual(\n                            tf.compat.as_text(data_batch_['text'][0][0]),\n                            str(i+1))\n                        i = (i+1) % 1000\n                    except tf.errors.OutOfRangeError:\n                        print('Train data limit reached')\n                        self.assertEqual(i, 0)\n                        break\n\n                # Iterates over test data\n                iterator.restart_dataset(sess, test_data.name)\n                data_handle = iterator.get_handle(sess, test_data.name)\n                i = 1001\n                while True:\n                    try:\n                        feed_dict = {iterator.handle: data_handle}\n                        data_batch_ = sess.run(data_batch, feed_dict=feed_dict)\n                        self.assertEqual(\n                            tf.compat.as_text(data_batch_['text'][0][0]),\n                            str(i))\n                        i += 1\n                    except tf.errors.OutOfRangeError:\n                        print('Test data limit reached')\n                        self.assertEqual(i, 2001)\n                        break\n\n    def test_train_test_feedable_data_iterator(self):\n        \"\"\"Tests :class:`texar.data.TrainTestFeedableDataIterator`\n        \"\"\"\n        train_data = tx.data.MonoTextData(self._train_hparams)\n        test_data = tx.data.MonoTextData(self._test_hparams)\n\n        iterator = tx.data.TrainTestFeedableDataIterator(train=train_data,\n                                                         test=test_data)\n        data_batch = iterator.get_next()\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            sess.run(tf.local_variables_initializer())\n            sess.run(tf.tables_initializer())\n\n            for _ in range(2):\n                iterator.restart_train_dataset(sess)\n                i = 0\n                while True:\n                    try:\n                        feed_dict = {\n                            iterator.handle: iterator.get_train_handle(sess)\n                        }\n                        data_batch_ = sess.run(data_batch, feed_dict=feed_dict)\n                        self.assertEqual(\n                            tf.compat.as_text(data_batch_['text'][0][0]),\n                            str(i+1))\n                        i = (i+1) % 1000\n                    except tf.errors.OutOfRangeError:\n                        print('Train data limit reached')\n                        self.assertEqual(i, 0)\n                        break\n\n                iterator.restart_test_dataset(sess)\n                i = 1001\n                while True:\n                    try:\n                        feed_dict = {\n                            iterator.handle: iterator.get_test_handle(sess)\n                        }\n                        data_batch_ = sess.run(data_batch, feed_dict=feed_dict)\n                        self.assertEqual(\n                            tf.compat.as_text(data_batch_['text'][0][0]),\n                            str(i))\n                        i += 1\n                    except tf.errors.OutOfRangeError:\n                        print('Test data limit reached')\n                        self.assertEqual(i, 2001)\n                        break\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/data/data/dataset_utils.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nVarious utilities specific to dataset processing.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport six\n\nimport tensorflow as tf\n\nimport numpy as np\n\nfrom texar.utils import utils\n\n# pylint: disable=invalid-name, too-many-arguments\n\n__all__ = [\n    \"_DataSpec\",\n    \"_connect_name\",\n    \"maybe_tuple\",\n    \"make_partial\",\n    \"make_chained_transformation\",\n    \"make_combined_transformation\",\n    \"random_shard_dataset\",\n]\n\nclass _DataSpec(object):\n    \"\"\"Dataset specification. Used to pass necessary info to\n    user-defined tranformation functions.\n\n    Args:\n        dataset: Instance of :tf_main:`tf.data.Dataset <data/Dataset>`.\n        dataset_size (int): Number of data samples.\n        decoder: A (list of) data decoder.\n        vocab: A (list of) :class:`~texar.data.Vocab` instance.\n        embeddidng: A (list of) :class:`~texar.data.Embedding` instance.\n        **kwargs: Any remaining dataset-specific fields.\n    \"\"\"\n    def __init__(self, dataset=None, dataset_size=None, decoder=None,\n                 vocab=None, embedding=None, **kwargs):\n        kwargs['dataset'] = dataset\n        kwargs['dataset_size'] = dataset_size\n        kwargs['decoder'] = decoder\n        kwargs['vocab'] = vocab\n        kwargs['embedding'] = embedding\n        self.__dict__.update(kwargs)\n\n    def add_spec(self, **kwargs):\n        \"\"\"Adds new field(s).\n        \"\"\"\n        self.__dict__.update(kwargs)\n\n    def get_ith_data_spec(self, i):\n        \"\"\"Returns an instance of :class:`_DataSpec` that contains the\n        `i`-th specifications.\n        \"\"\"\n        kwargs = {}\n        for k, v in six.iteritems(self.__dict__):\n            kwargs[k] = v[i] if isinstance(v, (tuple, list)) else v\n        return _DataSpec(**kwargs)\n\n    def set_ith_data_spec(self, i, data_spec, total_count):\n        \"\"\"Sets the `i`-th specification to respective values in\n        :attr:`data_spec`.\n        \"\"\"\n        for k, v in six.iteritems(data_spec.__dict__):\n            if k in self.__dict__:\n                v_ = self.__dict__[k]\n                if isinstance(v_, (tuple, list)):\n                    v_[i] = v\n                else:\n                    new_v_ = [v_] * total_count\n                    new_v_[i] = v\n                    self.__dict__[k] = new_v_\n            else:\n                v_ = [None] * total_count\n                v_[i] = v\n                self.__dict__[k] = v_\n\ndef _make_length_filter_fn(length_name, max_length):\n    \"\"\"Returns a predicate function which takes in data sample\n    and returns a bool indicating whether to filter by length.\n    \"\"\"\n    def _filter_fn(data):\n        return data[length_name] <= max_length\n    return _filter_fn\n\ndef _make_smaller_batch_filter_fn(batch_size):\n    \"\"\"Returns a predicate function which takes in a batched data\n    and returns a bool indicating whether the batch is of :attr:`batch_size`.\n    \"\"\"\n    def _filter_fn(data):\n        if isinstance(data, (list, tuple)):\n            return _filter_fn(data[0])\n        elif isinstance(data, dict):\n            return _filter_fn(data[next(iter(data))])\n        else:\n            return tf.equal(tf.shape(data)[0], batch_size)\n\n    return _filter_fn\n\ndef _make_combined_filter_fn(filter_fns, mode=\"and\"):\n    \"\"\"Returns a new predicate function that combines multiple\n    predicate functions with certain mode.\n\n    Returns `None` if all elements in :attr:`filter_fns` are `None`.\n\n    Args:\n        filter_fns (list): Filter functions to combine. `None` functions are\n            ignored.\n        mode (str): A mode from `{\"and\", \"or\"}`.\n    \"\"\"\n    if not any(filter_fns):\n        return None\n\n    def _combined_fn(data):\n        outputs = []\n        for fn in filter_fns:\n            if fn:\n                outputs.append(fn(data))\n        if mode == \"and\":\n            return tf.reduce_all(outputs)\n        elif mode == \"or\":\n            return tf.reduce_any(outputs)\n        else:\n            raise ValueError(\"Unknown mode: {}\".format(mode))\n    return _combined_fn\n\ndef _connect_name(lhs_name, rhs_name):\n    if not lhs_name:\n        return rhs_name\n    if not rhs_name:\n        return lhs_name\n    return \"{}_{}\".format(lhs_name, rhs_name)\n\ndef maybe_tuple(data):\n    \"\"\"Returns `tuple(data)` if :attr:`data` contains more than 1 elements.\n\n    Used to wrap `map_func` inputs.\n    \"\"\"\n    data = tuple(data)\n    data = data if len(data) > 1 else data[0]\n    return data\n\ndef make_partial(fn, *args, **kwargs):\n    \"\"\"Returns a new function with single argument by freezing other arguments\n    of :attr:`fn`.\n    \"\"\"\n    def _new_fn(data):\n        return fn(data, *args, **kwargs)\n    return _new_fn\n\ndef name_prefix_fn(name_prefix):\n    \"\"\"Returns a function that append a prefix to field names.\n    \"\"\"\n    def _prefix_fn(data):\n        transformed_data = {}\n        for name, value in six.iteritems(data):\n            new_name = _connect_name(name_prefix, name)\n            transformed_data[new_name] = value\n        return transformed_data\n\n    return _prefix_fn\n\ndef make_chained_transformation(tran_fns, *args, **kwargs):\n    \"\"\"Returns a dataset transformation function that applies a list of\n    transformations sequentially.\n\n    Args:\n        tran_fns (list): A list of dataset transformation function.\n        *args: Extra arguments for each of the transformation function.\n        **kwargs: Extra keyword arguments for each of the transformation\n            function.\n\n    Returns:\n        A transformation function to be used in\n        :tf_main:`tf.data.Dataset.map <data/Dataset#map>`.\n    \"\"\"\n    def _chained_fn(data):\n        for tran_fns_i in tran_fns:\n            data = tran_fns_i(data, *args, **kwargs)\n        return data\n\n    return _chained_fn\n\ndef make_combined_transformation(tran_fns, name_prefix=None, *args, **kwargs):\n    \"\"\"Returns a dataset transformation function that applies\n    transformations to each component of the data.\n\n    The data to be transformed must be a tuple of the same length\n    of :attr:`tran_fns`.\n\n    Args:\n        tran_fns (list): A list of elements where each element is a\n            transformation function or a list of transformation functions.\n        name_prefix (list, optional): Prefix to the field names of each\n            component of the data, to prevent fields with the same name\n            in different components from overriding each other. If not `None`,\n            must be of the same length of :attr:`tran_fns`.\n        *args: Extra arguments for each of the transformation function.\n        **kwargs: Extra keyword arguments for each of the transformation\n            function.\n\n    Returns:\n        A transformation function to be used in\n        :tf_main:`tf.data.Dataset.map <data/Dataset#map>`.\n    \"\"\"\n    if name_prefix and len(name_prefix) != len(tran_fns):\n        raise ValueError(\"`name_prefix`, if provided, must be of the same \"\n                         \"length of `tran_fns`.\")\n\n    def _combined_fn(data):\n        transformed_data = {}\n        for i, tran_fns_i in enumerate(tran_fns):\n            data_i = data[i]\n            # Process data_i\n            if not isinstance(tran_fns_i, (list, tuple)):\n                tran_fns_i = [tran_fns_i]\n            for tran_fns_ij in tran_fns_i:\n                data_i = tran_fns_ij(data_i, *args, **kwargs)\n            # Add to dict by appending name prefix\n            for name, value in six.iteritems(data_i):\n                new_name = name\n                if name_prefix:\n                    new_name = _connect_name(name_prefix[i], name)\n                if new_name in transformed_data:\n                    raise ValueError(\n                        \"Field name already exists: {}\".format(new_name))\n                transformed_data[new_name] = value\n        return transformed_data\n\n    return _combined_fn\n\ndef random_shard_dataset(dataset_size, shard_size, seed=None):\n    \"\"\"Returns a dataset transformation function that randomly shards a\n    dataset.\n    \"\"\"\n    num_shards = utils.ceildiv(dataset_size, shard_size)\n    boundaries = np.linspace(0, dataset_size, num=num_shards, endpoint=False,\n                             dtype=np.int64) #pylint: disable=no-member\n\n    def _shard_fn(dataset):\n        sharded_dataset = (\n            tf.data.Dataset.from_tensor_slices(boundaries)\n            .shuffle(num_shards, seed=seed)\n            .flat_map(lambda lb: dataset.skip(lb).take(shard_size)))\n        return sharded_dataset\n\n    return _shard_fn\n\n"
  },
  {
    "path": "texar_repo/texar/data/data/dataset_utils_test.py",
    "content": "# -*- coding: utf-8 -*-\n#\n\"\"\"\nUnit tests for data utils.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport numpy as np\n\nimport tensorflow as tf\n\nfrom texar.data.data import dataset_utils as dsutils\n\n\n# pylint: disable=invalid-name\n\nclass TransformationTest(tf.test.TestCase):\n    \"\"\"Tests various transformation utilities.\n    \"\"\"\n\n    def test_make_chained_transformation(self):\n        \"\"\"Tests :func:`texar.data.make_chained_transformation`\n        \"\"\"\n        original_data = np.arange(0, 10)\n        dataset = tf.data.Dataset.from_tensor_slices(original_data)\n\n        def _tran_a(data):\n            return data + 100\n        def _tran_b(data):\n            return data + 1000\n        def _tran_c(data):\n            return data + 10000\n\n        chained_tran = dsutils.make_chained_transformation(\n            [_tran_a, _tran_b, _tran_c])\n        dataset = dataset.map(chained_tran)\n\n        iterator = dataset.make_one_shot_iterator()\n        elem = iterator.get_next()\n        with self.test_session() as sess:\n            data_ = []\n            while True:\n                try:\n                    data_.append(sess.run(elem))\n                except tf.errors.OutOfRangeError:\n                    break\n            self.assertEqual(len(data_), len(original_data))\n            data_ = [elem_ - 11100 for elem_ in data_]\n            self.assertEqual(data_, original_data.tolist())\n\nif __name__ == \"__main__\":\n    tf.test.main()\n\n"
  },
  {
    "path": "texar_repo/texar/data/data/mono_text_data.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nMono text data class that define data reading, parsing, batching, and other\npreprocessing operations.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport tensorflow as tf\n\nfrom texar.utils import utils\nfrom texar.utils.dtypes import is_callable\nfrom texar.data.data_utils import count_file_lines\nfrom texar.data.data import dataset_utils as dsutils\nfrom texar.data.data.text_data_base import TextDataBase\nfrom texar.data.data_decoders import TextDataDecoder, VarUttTextDataDecoder\nfrom texar.data.vocabulary import Vocab, SpecialTokens\nfrom texar.data.embedding import Embedding\n\n# pylint: disable=invalid-name, arguments-differ, protected-access, no-member\n\n__all__ = [\n    \"_default_mono_text_dataset_hparams\",\n    \"MonoTextData\"\n]\n\nclass _LengthFilterMode(object): # pylint: disable=no-init, too-few-public-methods\n    \"\"\"Options of length filter mode.\n    \"\"\"\n    TRUNC = \"truncate\"\n    DISCARD = \"discard\"\n\ndef _default_mono_text_dataset_hparams():\n    \"\"\"Returns hyperparameters of a mono text dataset with default values.\n\n    See :meth:`texar.MonoTextData.default_hparams` for details.\n    \"\"\"\n    return {\n        \"files\": [],\n        \"compression_type\": None,\n        \"vocab_file\": \"\",\n        \"embedding_init\": Embedding.default_hparams(),\n        \"delimiter\": \" \",\n        \"max_seq_length\": None,\n        \"length_filter_mode\": \"truncate\",\n        \"pad_to_max_seq_length\": False,\n        \"bos_token\": SpecialTokens.BOS,\n        \"eos_token\": SpecialTokens.EOS,\n        \"other_transformations\": [],\n        \"variable_utterance\": False,\n        \"utterance_delimiter\": \"|||\",\n        \"max_utterance_cnt\": 5,\n        \"data_name\": None,\n        \"@no_typecheck\": [\"files\"]\n    }\n\nclass MonoTextData(TextDataBase):\n    \"\"\"Text data processor that reads single set of text files. This can be\n    used for, e.g., language models, auto-encoders, etc.\n\n    Args:\n        hparams: A `dict` or instance of :class:`~texar.HParams` containing\n            hyperparameters. See :meth:`default_hparams` for the defaults.\n\n    By default, the processor reads raw data files, performs tokenization,\n    batching and other pre-processing steps, and results in a TF Dataset\n    whose element is a python `dict` including three fields:\n\n        - \"text\":\n            A string Tensor of shape `[batch_size, max_time]` containing\n            the **raw** text toknes. `max_time` is the length of the longest\n            sequence in the batch.\n            Short sequences in the batch are padded with **empty string**.\n            BOS and EOS tokens are added as per\n            :attr:`hparams`. Out-of-vocabulary tokens are **NOT** replaced\n            with UNK.\n        - \"text_ids\":\n            An `int64` Tensor of shape `[batch_size, max_time]`\n            containing the token indexes.\n        - \"length\":\n            An `int` Tensor of shape `[batch_size]` containing the\n            length of each sequence in the batch (including BOS and\n            EOS if added).\n\n    If :attr:`'variable_utterance'` is set to `True` in :attr:`hparams`, the\n    resulting dataset has elements with four fields:\n\n        - \"text\":\n            A string Tensor of shape\n            `[batch_size, max_utterance, max_time]`, where *max_utterance* is\n            either the maximum number of utterances in each elements of the\n            batch, or :attr:`max_utterance_cnt` as specified in :attr:`hparams`.\n        - \"text_ids\":\n            An `int64` Tensor of shape\n            `[batch_size, max_utterance, max_time]` containing the token\n            indexes.\n        - \"length\":\n            An `int` Tensor of shape `[batch_size, max_utterance]`\n            containing the length of each sequence in the batch.\n        - \"utterance_cnt\":\n            An `int` Tensor of shape `[batch_size]` containing\n            the number of utterances of each element in the batch.\n\n    The above field names can be accessed through :attr:`text_name`,\n    :attr:`text_id_name`, :attr:`length_name`, and\n    :attr:`utterance_cnt_name`, respectively.\n\n    Example:\n\n        .. code-block:: python\n\n            hparams={\n                'dataset': { 'files': 'data.txt', 'vocab_file': 'vocab.txt' },\n                'batch_size': 1\n            }\n            data = MonoTextData(hparams)\n            iterator = DataIterator(data)\n            batch = iterator.get_next()\n\n            iterator.switch_to_dataset(sess) # initializes the dataset\n            batch_ = sess.run(batch)\n            # batch_ == {\n            #    'text': [['<BOS>', 'example', 'sequence', '<EOS>']],\n            #    'text_ids': [[1, 5, 10, 2]],\n            #    'length': [4]\n            # }\n    \"\"\"\n\n    def __init__(self, hparams):\n        TextDataBase.__init__(self, hparams)\n        with tf.name_scope(self.name, self.default_hparams()[\"name\"]):\n            self._make_data()\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dicitionary of default hyperparameters:\n\n        .. code-block:: python\n\n            {\n                # (1) Hyperparams specific to text dataset\n                \"dataset\": {\n                    \"files\": [],\n                    \"compression_type\": None,\n                    \"vocab_file\": \"\",\n                    \"embedding_init\": {},\n                    \"delimiter\": \" \",\n                    \"max_seq_length\": None,\n                    \"length_filter_mode\": \"truncate\",\n                    \"pad_to_max_seq_length\": False,\n                    \"bos_token\": \"<BOS>\"\n                    \"eos_token\": \"<EOS>\"\n                    \"other_transformations\": [],\n                    \"variable_utterance\": False,\n                    \"utterance_delimiter\": \"|||\",\n                    \"max_utterance_cnt\": 5,\n                    \"data_name\": None,\n                }\n                # (2) General hyperparams\n                \"num_epochs\": 1,\n                \"batch_size\": 64,\n                \"allow_smaller_final_batch\": True,\n                \"shuffle\": True,\n                \"shuffle_buffer_size\": None,\n                \"shard_and_shuffle\": False,\n                \"num_parallel_calls\": 1,\n                \"prefetch_buffer_size\": 0,\n                \"max_dataset_size\": -1,\n                \"seed\": None,\n                \"name\": \"mono_text_data\",\n                # (3) Bucketing\n                \"bucket_boundaries\": [],\n                \"bucket_batch_sizes\": None,\n                \"bucket_length_fn\": None,\n            }\n\n        Here:\n\n        1. For the hyperparameters in the :attr:`\"dataset\"` field:\n\n            \"files\" : str or list\n                A (list of) text file path(s).\n\n                Each line contains a single text sequence.\n\n            \"compression_type\" : str, optional\n                One of \"\" (no compression), \"ZLIB\", or \"GZIP\".\n\n            \"vocab_file\": str\n                Path to vocabulary file. Each line of the file should contain\n                one vocabulary token.\n\n                Used to create an instance of :class:`~texar.data.Vocab`.\n\n            \"embedding_init\" : dict\n                The hyperparameters for pre-trained embedding loading and\n                initialization.\n\n                The structure and default values are defined in\n                :meth:`texar.data.Embedding.default_hparams`.\n\n            \"delimiter\" : str\n                The delimiter to split each line of the text files into tokens.\n\n            \"max_seq_length\" : int, optional\n                Maximum length of output sequences. Data samples exceeding the\n                length will be truncated or discarded according to\n                :attr:`\"length_filter_mode\"`. The length does not include\n                any added\n                :attr:`\"bos_token\"` or :attr:`\"eos_token\"`. If `None` (default),\n                no filtering is performed.\n\n            \"length_filter_mode\" : str\n                Either \"truncate\" or \"discard\". If \"truncate\" (default),\n                tokens exceeding the :attr:`\"max_seq_length\"` will be truncated.\n                If \"discard\", data samples longer than the\n                :attr:`\"max_seq_length\"`\n                will be discarded.\n\n            \"pad_to_max_seq_length\" : bool\n                If `True`, pad all data instances to length\n                :attr:`\"max_seq_length\"`.\n                Raises error if :attr:`\"max_seq_length\"` is not provided.\n\n            \"bos_token\" : str\n                The Begin-Of-Sequence token prepended to each sequence.\n\n                Set to an empty string to avoid prepending.\n\n            \"eos_token\" : str\n                The End-Of-Sequence token appended to each sequence.\n\n                Set to an empty string to avoid appending.\n\n            \"other_transformations\" : list\n                A list of transformation functions or function names/paths to\n                further transform each single data instance.\n\n                (More documentations to be added.)\n\n            \"variable_utterance\" : bool\n                If `True`, each line of the text file is considered to contain\n                multiple sequences (utterances) separated by\n                :attr:`\"utterance_delimiter\"`.\n\n                For example, in dialog data, each line can contain a series of\n                dialog history utterances. See the example in\n                `examples/hierarchical_dialog` for a use case.\n\n            \"utterance_delimiter\" : str\n                The delimiter to split over utterance level. Should not be the\n                same with :attr:`\"delimiter\"`. Used only when\n                :attr:`\"variable_utterance\"``==True`.\n\n            \"max_utterance_cnt\" : int\n                Maximally allowed number of utterances in a data instance.\n                Extra utterances are truncated out.\n\n            \"data_name\" : str\n                Name of the dataset.\n\n        2. For the **general** hyperparameters, see\n        :meth:`texar.data.DataBase.default_hparams` for details.\n\n        3. **Bucketing** is to group elements of the dataset together by length\n        and then pad and batch. (See more at\n        :tf_main:`bucket_by_sequence_length\n        <contrib/data/bucket_by_sequence_length>`). For bucketing\n        hyperparameters:\n\n            \"bucket_boundaries\" : list\n                An int list containing the upper length boundaries of the\n                buckets.\n\n                Set to an empty list (default) to disable bucketing.\n\n            \"bucket_batch_sizes\" : list\n                An int list containing batch size per bucket. Length should be\n                `len(bucket_boundaries) + 1`.\n\n                If `None`, every bucket whill have the same batch size specified\n                in :attr:`batch_size`.\n\n            \"bucket_length_fn\" : str or callable\n                Function maps dataset element to `tf.int32` scalar, determines\n                the length of the element.\n\n                This can be a function, or the name or full module path to the\n                function. If function name is given, the function must be in the\n                :mod:`texar.custom` module.\n\n                If `None` (default), length is determined by the number of\n                tokens (including BOS and EOS if added) of the element.\n\n        \"\"\"\n        hparams = TextDataBase.default_hparams()\n        hparams[\"name\"] = \"mono_text_data\"\n        hparams.update({\n            \"dataset\": _default_mono_text_dataset_hparams()\n        })\n        return hparams\n\n    @staticmethod\n    def make_vocab(hparams):\n        \"\"\"Reads vocab file and returns an instance of\n        :class:`texar.data.Vocab`.\n        \"\"\"\n        bos_token = utils.default_str(\n            hparams[\"bos_token\"], SpecialTokens.BOS)\n        eos_token = utils.default_str(\n            hparams[\"eos_token\"], SpecialTokens.EOS)\n        vocab = Vocab(hparams[\"vocab_file\"],\n                      bos_token=bos_token, eos_token=eos_token)\n        return vocab\n\n    @staticmethod\n    def make_embedding(emb_hparams, token_to_id_map):\n        \"\"\"Optionally loads embedding from file (if provided), and returns\n        an instance of :class:`texar.data.Embedding`.\n        \"\"\"\n        embedding = None\n        if emb_hparams[\"file\"] is not None and len(emb_hparams[\"file\"]) > 0:\n            embedding = Embedding(token_to_id_map, emb_hparams)\n        return embedding\n\n    @staticmethod\n    def _make_mono_text_dataset(dataset_hparams):\n        dataset = tf.data.TextLineDataset(\n            dataset_hparams[\"files\"],\n            compression_type=dataset_hparams[\"compression_type\"])\n        return dataset\n\n    @staticmethod\n    def _make_other_transformations(other_trans_hparams, data_spec):\n        \"\"\"Creates a list of tranformation functions based on the\n        hyperparameters.\n\n        Args:\n            other_trans_hparams (list): A list of transformation functions,\n                names, or full paths.\n            data_spec: An instance of :class:`texar.data._DataSpec` to\n                be passed to transformation functions.\n\n        Returns:\n            A list of transformation functions.\n        \"\"\"\n        other_trans = []\n        for tran in other_trans_hparams:\n            if not is_callable(tran):\n                tran = utils.get_function(tran, [\"texar.custom\"])\n            other_trans.append(dsutils.make_partial(tran, data_spec))\n        return other_trans\n\n    @staticmethod\n    def _make_processor(dataset_hparams, data_spec, chained=True,\n                        name_prefix=None):\n        # Create data decoder\n        max_seq_length = None\n        if dataset_hparams[\"length_filter_mode\"] == \"truncate\":\n            max_seq_length = dataset_hparams[\"max_seq_length\"]\n\n        if not dataset_hparams[\"variable_utterance\"]:\n            decoder = TextDataDecoder(\n                delimiter=dataset_hparams[\"delimiter\"],\n                bos_token=dataset_hparams[\"bos_token\"],\n                eos_token=dataset_hparams[\"eos_token\"],\n                max_seq_length=max_seq_length,\n                token_to_id_map=data_spec.vocab.token_to_id_map)\n        else:\n            decoder = VarUttTextDataDecoder( # pylint: disable=redefined-variable-type\n                sentence_delimiter=dataset_hparams[\"utterance_delimiter\"],\n                delimiter=dataset_hparams[\"delimiter\"],\n                bos_token=dataset_hparams[\"bos_token\"],\n                eos_token=dataset_hparams[\"eos_token\"],\n                max_seq_length=max_seq_length,\n                max_utterance_cnt=dataset_hparams[\"max_utterance_cnt\"],\n                token_to_id_map=data_spec.vocab.token_to_id_map)\n\n        # Create other transformations\n        data_spec.add_spec(decoder=decoder)\n        other_trans = MonoTextData._make_other_transformations(\n            dataset_hparams[\"other_transformations\"], data_spec)\n        if name_prefix:\n            other_trans.append(dsutils.name_prefix_fn(name_prefix))\n\n        data_spec.add_spec(name_prefix=name_prefix)\n\n        if chained:\n            chained_tran = dsutils.make_chained_transformation(\n                [decoder] + other_trans)\n            return chained_tran, data_spec\n        else:\n            return decoder, other_trans, data_spec\n\n    @staticmethod\n    def _make_length_filter(dataset_hparams, length_name, decoder):\n        filter_mode = dataset_hparams[\"length_filter_mode\"]\n        max_length = dataset_hparams[\"max_seq_length\"]\n        filter_fn = None\n        if filter_mode == _LengthFilterMode.DISCARD and max_length is not None:\n            max_length += decoder.added_length\n            filter_fn = dsutils._make_length_filter_fn(length_name,\n                                                       max_length)\n        return filter_fn\n\n    def _process_dataset(self, dataset, hparams, data_spec):\n        chained_tran, data_spec = self._make_processor(\n            hparams[\"dataset\"], data_spec,\n            name_prefix=hparams[\"dataset\"][\"data_name\"])\n        num_parallel_calls = hparams[\"num_parallel_calls\"]\n        dataset = dataset.map(\n            lambda *args: chained_tran(dsutils.maybe_tuple(args)),\n            num_parallel_calls=num_parallel_calls)\n\n        # Filters by length\n        length_name = dsutils._connect_name(\n            data_spec.name_prefix,\n            data_spec.decoder.length_tensor_name)\n        filter_fn = self._make_length_filter(\n            hparams[\"dataset\"], length_name, data_spec.decoder)\n        if filter_fn:\n            dataset = dataset.filter(filter_fn)\n\n        # Truncates data count\n        dataset = dataset.take(hparams[\"max_dataset_size\"])\n\n        return dataset, data_spec\n\n    def _make_bucket_length_fn(self):\n        length_fn = self._hparams.bucket_length_fn\n        if not length_fn:\n            length_fn = lambda x: x[self.length_name]\n        elif not is_callable(length_fn):\n            # pylint: disable=redefined-variable-type\n            length_fn = utils.get_function(length_fn, [\"texar.custom\"])\n        return length_fn\n\n    @staticmethod\n    def _make_padded_text_and_id_shapes(dataset, dataset_hparams, decoder,\n                                        text_name, text_id_name):\n        max_length = dataset_hparams['max_seq_length']\n        if max_length is None:\n            raise ValueError(\"hparams 'max_seq_length' must be specified \"\n                             \"when 'pad_to_max_seq_length' is True.\")\n        max_length += decoder.added_length\n\n        padded_shapes = dataset.output_shapes\n\n        def _get_new_shape(name):\n            dim = len(padded_shapes[name])\n            if not dataset_hparams['variable_utterance']:\n                if dim != 1:\n                    raise ValueError(\n                        \"Unable to pad data '%s' to max seq length. Expected \"\n                        \"1D Tensor, but got %dD Tensor.\" % (name, dim))\n                return tf.TensorShape(max_length)\n            else:\n                if dim != 2:\n                    raise ValueError(\n                        \"Unable to pad data '%s' to max seq length. Expected \"\n                        \"2D Tensor, but got %dD Tensor.\" % (name, dim))\n                return tf.TensorShape([padded_shapes[name][0], max_length])\n\n        text_and_id_shapes = {}\n        if text_name in padded_shapes:\n            text_and_id_shapes[text_name] = _get_new_shape(text_name)\n        if text_id_name in padded_shapes:\n            text_and_id_shapes[text_id_name] = _get_new_shape(text_id_name)\n\n        return text_and_id_shapes\n\n    def _make_padded_shapes(self, dataset, decoder):\n        if not self._hparams.dataset.pad_to_max_seq_length:\n            return None\n\n        text_and_id_shapes = MonoTextData._make_padded_text_and_id_shapes(\n            dataset, self._hparams.dataset, decoder,\n            self.text_name, self.text_id_name)\n\n        padded_shapes = dataset.output_shapes\n        padded_shapes.update(text_and_id_shapes)\n\n        return padded_shapes\n\n    def _make_data(self):\n        dataset_hparams = self._hparams.dataset\n\n        # Create vocab and embedding\n        self._vocab = self.make_vocab(dataset_hparams)\n        self._embedding = self.make_embedding(\n            dataset_hparams[\"embedding_init\"], self._vocab.token_to_id_map_py)\n\n        # Create and shuffle dataset\n        dataset = self._make_mono_text_dataset(dataset_hparams)\n        dataset, dataset_size = self._shuffle_dataset(\n            dataset, self._hparams, self._hparams.dataset.files)\n        self._dataset_size = dataset_size\n\n        # Processing\n        data_spec = dsutils._DataSpec(dataset=dataset,\n                                      dataset_size=self._dataset_size,\n                                      vocab=self._vocab,\n                                      embedding=self._embedding)\n        dataset, data_spec = self._process_dataset(dataset, self._hparams,\n                                                   data_spec)\n        self._data_spec = data_spec\n        self._decoder = data_spec.decoder\n\n        # Batching\n        length_fn = self._make_bucket_length_fn()\n        padded_shapes = self._make_padded_shapes(dataset, self._decoder)\n        dataset = self._make_batch(\n            dataset, self._hparams, length_fn, padded_shapes)\n\n        # Prefetching\n        if self._hparams.prefetch_buffer_size > 0:\n            dataset = dataset.prefetch(self._hparams.prefetch_buffer_size)\n\n        self._dataset = dataset\n\n    def list_items(self):\n        \"\"\"Returns the list of item names that the data can produce.\n\n        Returns:\n            A list of strings.\n        \"\"\"\n        return list(self._dataset.output_types.keys())\n\n    @property\n    def dataset(self):\n        \"\"\"The dataset, an instance of\n        :tf_main:`TF dataset <data/TextLineDataset>`.\n        \"\"\"\n        return self._dataset\n\n    def dataset_size(self):\n        \"\"\"Returns the number of data instances in the data files.\n\n        Note that this is the total data count in the raw files, before any\n        filtering and truncation.\n        \"\"\"\n        if not self._dataset_size:\n            # pylint: disable=attribute-defined-outside-init\n            self._dataset_size = count_file_lines(\n                self._hparams.dataset.files)\n        return self._dataset_size\n\n    @property\n    def vocab(self):\n        \"\"\"The vocabulary, an instance of :class:`~texar.data.Vocab`.\n        \"\"\"\n        return self._vocab\n\n    @property\n    def embedding_init_value(self):\n        \"\"\"The `Tensor` containing the embedding value loaded from file.\n        `None` if embedding is not specified.\n        \"\"\"\n        if self._embedding is None:\n            return None\n        return self._embedding.word_vecs\n\n    @property\n    def text_name(self):\n        \"\"\"The name of text tensor, \"text\" by default.\n        \"\"\"\n        name = dsutils._connect_name(\n            self._data_spec.name_prefix,\n            self._data_spec.decoder.text_tensor_name)\n        return name\n\n    @property\n    def length_name(self):\n        \"\"\"The name of length tensor, \"length\" by default.\n        \"\"\"\n        name = dsutils._connect_name(\n            self._data_spec.name_prefix,\n            self._data_spec.decoder.length_tensor_name)\n        return name\n\n    @property\n    def text_id_name(self):\n        \"\"\"The name of text index tensor, \"text_ids\" by default.\n        \"\"\"\n        name = dsutils._connect_name(\n            self._data_spec.name_prefix,\n            self._data_spec.decoder.text_id_tensor_name)\n        return name\n\n    @property\n    def utterance_cnt_name(self):\n        \"\"\"The name of utterance count tensor, \"utterance_cnt\" by default.\n        \"\"\"\n        if not self._hparams.dataset.variable_utterance:\n            raise ValueError(\"`utterance_cnt_name` is not defined.\")\n        name = dsutils._connect_name(\n            self._data_spec.name_prefix,\n            self._data_spec.decoder.utterance_cnt_tensor_name)\n        return name\n\n"
  },
  {
    "path": "texar_repo/texar/data/data/mono_text_data_test.py",
    "content": "# -*- coding: utf-8 -*-\n#\n\"\"\"\nUnit tests for data related operations.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport tempfile\nimport copy\nimport numpy as np\n\nimport tensorflow as tf\n\nimport texar as tx\n\n# pylint: disable=too-many-locals, protected-access, too-many-branches\n# pylint: disable=invalid-name\n\nclass MonoTextDataTest(tf.test.TestCase):\n    \"\"\"Tests text data class.\n    \"\"\"\n\n    def setUp(self):\n        tf.test.TestCase.setUp(self)\n\n        # Create test data\n        vocab_list = ['word', '词']\n        vocab_file = tempfile.NamedTemporaryFile()\n        vocab_file.write('\\n'.join(vocab_list).encode(\"utf-8\"))\n        vocab_file.flush()\n        self._vocab_file = vocab_file\n        self._vocab_size = len(vocab_list)\n\n        text = ['This is a test sentence .', '词 词 。']\n        text_file = tempfile.NamedTemporaryFile()\n        text_file.write('\\n'.join(text).encode(\"utf-8\"))\n        text_file.flush()\n        self._text_file = text_file\n\n        self._hparams = {\n            \"num_epochs\": 50,\n            \"batch_size\": 3,\n            \"dataset\": {\n                \"files\": self._text_file.name,\n                \"vocab_file\": self._vocab_file.name,\n            }\n        }\n\n    def _run_and_test(self,\n                      hparams,\n                      test_batch_size=False,\n                      length_inc=None):\n        # Construct database\n        text_data = tx.data.MonoTextData(hparams)\n        self.assertEqual(text_data.vocab.size,\n                         self._vocab_size + len(text_data.vocab.special_tokens))\n\n        iterator = text_data.dataset.make_initializable_iterator()\n        text_data_batch = iterator.get_next()\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            sess.run(tf.local_variables_initializer())\n            sess.run(tf.tables_initializer())\n            sess.run(iterator.initializer)\n\n            while True:\n                try:\n                    data_batch_ = sess.run(text_data_batch)\n\n                    self.assertEqual(set(data_batch_.keys()),\n                                     set(text_data.list_items()))\n\n                    if test_batch_size:\n                        self.assertEqual(len(data_batch_['text']),\n                                         hparams['batch_size'])\n\n                    if length_inc:\n                        for i in range(len(data_batch_['text'])):\n                            text_ = data_batch_['text'][i].tolist()\n                            self.assertEqual(\n                                text_.index(b'<EOS>') + 1,\n                                data_batch_['length'][i] - length_inc)\n\n                    max_seq_length = text_data.hparams.dataset.max_seq_length\n                    mode = text_data.hparams.dataset.length_filter_mode\n                    if max_seq_length == 6:\n                        max_l = max_seq_length\n                        max_l += text_data._decoder.added_length\n                        for length in data_batch_['length']:\n                            self.assertLessEqual(length, max_l)\n                        if mode == \"discard\":\n                            for length in data_batch_['length']:\n                                self.assertEqual(length, 5)\n                        elif mode == \"truncate\":\n                            num_length_6 = 0\n                            for length in data_batch_['length']:\n                                num_length_6 += int(length == 6)\n                            self.assertGreater(num_length_6, 0)\n                        else:\n                            raise ValueError(\"Unknown mode: %s\" % mode)\n\n                    if text_data.hparams.dataset.pad_to_max_seq_length:\n                        max_l = max_seq_length + text_data._decoder.added_length\n                        for x in data_batch_['text']:\n                            self.assertEqual(len(x), max_l)\n                        for x in data_batch_['text_ids']:\n                            self.assertEqual(len(x), max_l)\n\n                except tf.errors.OutOfRangeError:\n                    print('Done -- epoch limit reached')\n                    break\n\n    def test_default_setting(self):\n        \"\"\"Tests the logics of MonoTextData.\n        \"\"\"\n        self._run_and_test(self._hparams)\n\n    def test_batching(self):\n        \"\"\"Tests different batching.\n        \"\"\"\n        # dis-allow smaller final batch\n        hparams = copy.copy(self._hparams)\n        hparams.update({\"allow_smaller_final_batch\": False})\n        self._run_and_test(hparams, test_batch_size=True)\n\n    def test_bucketing(self):\n        \"\"\"Tests bucketing.\n        \"\"\"\n        hparams = copy.copy(self._hparams)\n        hparams.update({\n            \"bucket_boundaries\": [7],\n            \"bucket_batch_sizes\": [6, 4]})\n\n        text_data = tx.data.MonoTextData(hparams)\n        iterator = text_data.dataset.make_initializable_iterator()\n        text_data_batch = iterator.get_next()\n\n        hparams.update({\n            \"bucket_boundaries\": [7],\n            \"bucket_batch_sizes\": [7, 7],\n            \"allow_smaller_final_batch\": False})\n\n        text_data_1 = tx.data.MonoTextData(hparams)\n        iterator_1 = text_data_1.dataset.make_initializable_iterator()\n        text_data_batch_1 = iterator_1.get_next()\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            sess.run(tf.local_variables_initializer())\n            sess.run(tf.tables_initializer())\n            sess.run(iterator.initializer)\n            sess.run(iterator_1.initializer)\n\n            while True:\n                try:\n                    # Run the logics\n                    data_batch_, data_batch_1_ = sess.run(\n                        [text_data_batch, text_data_batch_1])\n\n                    length_ = data_batch_['length'][0]\n                    if length_ < 7:\n                        last_batch_size = hparams['num_epochs'] % 6\n                        self.assertTrue(\n                            len(data_batch_['text']) == 6 or\n                            len(data_batch_['text']) == last_batch_size)\n                    else:\n                        last_batch_size = hparams['num_epochs'] % 4\n                        self.assertTrue(\n                            len(data_batch_['text']) == 4 or\n                            len(data_batch_['text']) == last_batch_size)\n\n                    self.assertEqual(len(data_batch_1_['text']), 7)\n\n                except tf.errors.OutOfRangeError:\n                    print('Done -- epoch limit reached')\n                    break\n\n    def test_shuffle(self):\n        \"\"\"Tests different shuffle strategies.\n        \"\"\"\n        hparams = copy.copy(self._hparams)\n        hparams.update({\n            \"shard_and_shuffle\": True,\n            \"shuffle_buffer_size\": 1})\n        self._run_and_test(hparams)\n\n    def test_prefetch(self):\n        \"\"\"Tests prefetching.\n        \"\"\"\n        hparams = copy.copy(self._hparams)\n        hparams.update({\"prefetch_buffer_size\": 2})\n        self._run_and_test(hparams)\n\n    def test_other_transformations(self):\n        \"\"\"Tests use of other transformations\n        \"\"\"\n        def _transform(x, data_specs): # pylint: disable=invalid-name\n            x[data_specs.decoder.length_tensor_name] += 1\n            return x\n\n        hparams = copy.copy(self._hparams)\n        hparams[\"dataset\"].update(\n            {\"other_transformations\": [_transform, _transform]})\n        self._run_and_test(hparams, length_inc=2)\n\n    def test_list_items(self):\n        \"\"\"Tests the item names of the output data.\n        \"\"\"\n        text_data = tx.data.MonoTextData(self._hparams)\n        self.assertSetEqual(set(text_data.list_items()),\n                            {\"text\", \"text_ids\", \"length\"})\n\n        hparams = copy.copy(self._hparams)\n        hparams[\"dataset\"][\"data_name\"] = \"data\"\n        text_data = tx.data.MonoTextData(hparams)\n        self.assertSetEqual(set(text_data.list_items()),\n                            {\"data_text\", \"data_text_ids\", \"data_length\"})\n\n    def test_length_discard(self):\n        \"\"\"Tests discard lenghy seq.\n        \"\"\"\n        hparams = copy.copy(self._hparams)\n        hparams[\"dataset\"].update({\"max_seq_length\": 4,\n                                   \"length_filter_mode\": \"discard\"})\n        self._run_and_test(hparams)\n\n    def test_length_truncate(self):\n        \"\"\"Tests truncation.\n        \"\"\"\n        hparams = copy.copy(self._hparams)\n        hparams[\"dataset\"].update({\"max_seq_length\": 4,\n                                   \"length_filter_mode\": \"truncate\"})\n        hparams[\"shuffle\"] = False\n        hparams[\"allow_smaller_final_batch\"] = False\n        self._run_and_test(hparams)\n\n    def test_pad_to_max_length(self):\n        \"\"\"Tests padding.\n        \"\"\"\n        hparams = copy.copy(self._hparams)\n        hparams[\"dataset\"].update({\"max_seq_length\": 10,\n                                   \"length_filter_mode\": \"truncate\",\n                                   \"pad_to_max_seq_length\": True})\n        self._run_and_test(hparams)\n\n\nclass VarUttMonoTextDataTest(tf.test.TestCase):\n    \"\"\"Tests variable utterance text data class.\n    \"\"\"\n\n    def setUp(self):\n        tf.test.TestCase.setUp(self)\n\n        # Create test data\n        vocab_list = ['word', 'sentence', '词', 'response', 'dialog', '1', '2']\n        vocab_file = tempfile.NamedTemporaryFile()\n        vocab_file.write('\\n'.join(vocab_list).encode(\"utf-8\"))\n        vocab_file.flush()\n        self._vocab_file = vocab_file\n        self._vocab_size = len(vocab_list)\n\n        text = [\n            'This is a dialog 1 sentence . ||| This is a dialog 1 sentence . '\n            '||| This is yet another dialog 1 sentence .', #//\n            'This is a dialog 2 sentence . ||| '\n            'This is also a dialog 2 sentence . ', #//\n            '词 词 词 ||| word', #//\n            'This This', #//\n            '1 1 1 ||| 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 ||| 1 1 1 ||| 2'\n        ]\n        text_file = tempfile.NamedTemporaryFile()\n        text_file.write('\\n'.join(text).encode(\"utf-8\"))\n        text_file.flush()\n        self._text_file = text_file\n\n        self._hparams = {\n            \"num_epochs\": 50,\n            \"batch_size\": 3,\n            \"shuffle\": False,\n            \"dataset\": {\n                \"files\": self._text_file.name,\n                \"vocab_file\": self._vocab_file.name,\n                \"variable_utterance\": True,\n                \"max_utterance_cnt\": 3,\n                \"max_seq_length\": 10\n            }\n        }\n\n    def _run_and_test(self, hparams):\n        # Construct database\n        text_data = tx.data.MonoTextData(hparams)\n        self.assertEqual(text_data.vocab.size,\n                         self._vocab_size + len(text_data.vocab.special_tokens))\n\n        iterator = text_data.dataset.make_initializable_iterator()\n        text_data_batch = iterator.get_next()\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            sess.run(tf.local_variables_initializer())\n            sess.run(tf.tables_initializer())\n            sess.run(iterator.initializer)\n\n            while True:\n                try:\n                    # Run the logics\n                    data_batch_ = sess.run(text_data_batch)\n\n                    self.assertEqual(set(data_batch_.keys()),\n                                     set(text_data.list_items()))\n\n                    # Test utterance count\n                    utt_ind = np.sum(data_batch_[\"text_ids\"], 2) != 0\n                    utt_cnt = np.sum(utt_ind, 1)\n                    self.assertListEqual(\n                        data_batch_[text_data.utterance_cnt_name].tolist(),\n                        utt_cnt.tolist())\n\n                    if text_data.hparams.dataset.pad_to_max_seq_length:\n                        max_l = text_data.hparams.dataset.max_seq_length\n                        max_l += text_data._decoder.added_length\n                        for x in data_batch_['text']:\n                            for xx in x:\n                                self.assertEqual(len(xx), max_l)\n                        for x in data_batch_['text_ids']:\n                            for xx in x:\n                                self.assertEqual(len(xx), max_l)\n\n                except tf.errors.OutOfRangeError:\n                    print('Done -- epoch limit reached')\n                    break\n\n    def test_default_setting(self):\n        \"\"\"Tests the logics of the text data.\n        \"\"\"\n        self._run_and_test(self._hparams)\n\n    def test_pad_to_max_length(self):\n        \"\"\"Tests padding.\n        \"\"\"\n        hparams = copy.copy(self._hparams)\n        hparams[\"dataset\"].update({\"max_seq_length\": 20,\n                                   \"length_filter_mode\": \"truncate\",\n                                   \"pad_to_max_seq_length\": True})\n        self._run_and_test(hparams)\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/data/data/multi_aligned_data.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nPaired text data that consists of source text and target text.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport copy\n\nimport tensorflow as tf\n\nfrom texar.hyperparams import HParams\nfrom texar.utils import utils\nfrom texar.utils.dtypes import is_str, is_callable\nfrom texar.data.data.text_data_base import TextDataBase\nfrom texar.data.data.scalar_data import ScalarData\nfrom texar.data.data.mono_text_data import _default_mono_text_dataset_hparams\nfrom texar.data.data.scalar_data import _default_scalar_dataset_hparams\nfrom texar.data.data.mono_text_data import MonoTextData\nfrom texar.data.data_utils import count_file_lines\nfrom texar.data.data import dataset_utils as dsutils\nfrom texar.data.vocabulary import Vocab, SpecialTokens\nfrom texar.data.embedding import Embedding\n\n# pylint: disable=invalid-name, arguments-differ\n# pylint: disable=protected-access, too-many-instance-attributes\n\n__all__ = [\n    \"_default_dataset_hparams\",\n    \"MultiAlignedData\"\n]\n\nclass _DataTypes(object): # pylint: disable=no-init, too-few-public-methods\n    \"\"\"Enumeration of data types.\n    \"\"\"\n    TEXT = \"text\"\n    INT = \"int\"\n    FLOAT = \"float\"\n\ndef _is_text_data(data_type):\n    return data_type == _DataTypes.TEXT\ndef _is_scalar_data(data_type):\n    return data_type == _DataTypes.INT or data_type == _DataTypes.FLOAT\n\ndef _default_dataset_hparams(data_type=None):\n    \"\"\"Returns hyperparameters of a dataset with default values.\n\n    See :meth:`texar.data.MultiAlignedData.default_hparams` for details.\n    \"\"\"\n    if not data_type or _is_text_data(data_type):\n        hparams = _default_mono_text_dataset_hparams()\n        hparams.update({\n            \"data_type\": _DataTypes.TEXT,\n            \"vocab_share_with\": None,\n            \"embedding_init_share_with\": None,\n            \"processing_share_with\": None,\n        })\n    elif _is_scalar_data(data_type):\n        hparams = _default_scalar_dataset_hparams()\n    return hparams\n\nclass MultiAlignedData(TextDataBase):\n    \"\"\"Data consisting of multiple aligned parts.\n\n    Args:\n        hparams (dict): Hyperparameters. See :meth:`default_hparams` for the\n            defaults.\n\n    The processor can read any number of parallel fields as specified in\n    the \"datasets\" list of :attr:`hparams`, and result in a TF Dataset whose\n    element is a python `dict` containing data fields from each of the\n    specified datasets. Fields from a text dataset have names prefixed by\n    its \"data_name\". Fields from a scalar dataset are specified by its\n    \"data_name\".\n\n    Example:\n\n        .. code-block:: python\n\n            hparams={\n                'datasets': [\n                    {'files': 'a.txt', 'vocab_file': 'v.a', 'data_name': 'x'},\n                    {'files': 'b.txt', 'vocab_file': 'v.b', 'data_name': 'y'},\n                    {'files': 'c.txt', 'data_type': 'int', 'data_name': 'z'}\n                ]\n                'batch_size': 1\n            }\n            data = MultiAlignedData(hparams)\n            iterator = DataIterator(data)\n            batch = iterator.get_next()\n\n            iterator.switch_to_dataset(sess) # initializes the dataset\n            batch_ = sess.run(batch)\n            # batch_ == {\n            #    'x_text': [['<BOS>', 'x', 'sequence', '<EOS>']],\n            #    'x_text_ids': [['1', '5', '10', '2']],\n            #    'x_length': [4]\n            #    'y_text': [['<BOS>', 'y', 'sequence', '1', '<EOS>']],\n            #    'y_text_ids': [['1', '6', '10', '20', '2']],\n            #    'y_length': [5],\n            #    'z': [1000]\n            # }\n\n    \"\"\"\n    def __init__(self, hparams):\n        TextDataBase.__init__(self, hparams)\n        # Defaultizes hparams of each dataset\n        datasets_hparams = self._hparams.datasets\n        defaultized_datasets_hparams = []\n        for ds_hpms in datasets_hparams:\n            data_type = ds_hpms.get(\"data_type\", None)\n            defaultized_ds_hpms = HParams(ds_hpms,\n                                          _default_dataset_hparams(data_type))\n            defaultized_datasets_hparams.append(defaultized_ds_hpms)\n        self._hparams.datasets = defaultized_datasets_hparams\n\n        with tf.name_scope(self.name, self.default_hparams()[\"name\"]):\n            self._make_data()\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dicitionary of default hyperparameters.\n\n        .. code-block:: python\n\n            {\n                # (1) Hyperparams specific to text dataset\n                \"datasets\": []\n                # (2) General hyperparams\n                \"num_epochs\": 1,\n                \"batch_size\": 64,\n                \"allow_smaller_final_batch\": True,\n                \"shuffle\": True,\n                \"shuffle_buffer_size\": None,\n                \"shard_and_shuffle\": False,\n                \"num_parallel_calls\": 1,\n                \"prefetch_buffer_size\": 0,\n                \"max_dataset_size\": -1,\n                \"seed\": None,\n                \"name\": \"multi_aligned_data\",\n            }\n\n        Here:\n\n        1. \"datasets\" is a list of `dict` each of which specifies a\n        text or scalar dataset. The :attr:`\"data_name\"` field of each dataset\n        is used as the name prefix of the data fields from the respective\n        dataset. The :attr:`\"data_name\"` field of each dataset should not\n        be the same.\n\n            - For scalar dataset, the allowed hyperparameters and default \\\n            values are the same as the \"dataset\" field of \\\n            :meth:`texar.data.ScalarData.default_hparams`. Note that \\\n            :attr:`\"data_type\"` must be explicily specified \\\n            (either \"int\" or \"float\"). \\\n\n            - For text dataset, the allowed hyperparameters and default values\\\n            are the same as the \"dataset\" filed of \\\n            :meth:`texar.data.MonoTextData.default_hparams`, with several \\\n            extra hyperparameters:\n\n                \"data_type\" : str\n                    The type of the dataset, one of {\"text\", \"int\", \"float\"}.\n                    If set to \"int\" or \"float\", the dataset is considered to be\n                    a scalar dataset. If not specified or set to \"text\", the\n                    dataset is considered to be a text dataset.\n\n                \"vocab_share_with\" : int, optional\n                    Share the vocabulary of a preceding text dataset with the\n                    specified index in the list (starting from 0). The\n                    specified dataset must be a text dataset, and must have\n                    an index smaller than the current dataset.\n\n                    If specified, the vocab file of current dataset is ignored.\n                    Default is `None` which disables the vocab sharing.\n\n                \"embedding_init_share_with\": int, optional\n                    Share the embedding initial value of a preceding text\n                    dataset with the specified index in the list (starting\n                    from 0).\n                    The specified dataset must be a text dataset, and must have\n                    an index smaller than the current dataset.\n\n                    If specified, the :attr:`\"embedding_init\"` field of\n                    the current dataset is ignored. Default is `None` which\n                    disables the initial value sharing.\n\n                \"processing_share_with\" : int, optional\n                    Share the processing configurations of a preceding text\n                    dataset with the specified index in the list (starting\n                    from 0).\n                    The specified dataset must be a text dataset, and must have\n                    an index smaller than the current dataset.\n\n                    If specified, relevant field of the current dataset are\n                    ignored, including \"delimiter\", \"bos_token\", \"eos_token\",\n                    and \"other_transformations\". Default is `None` which\n                    disables the processing sharing.\n\n        2. For the **general** hyperparameters, see\n        :meth:`texar.data.DataBase.default_hparams` for details.\n        \"\"\"\n        hparams = TextDataBase.default_hparams()\n        hparams[\"name\"] = \"multi_aligned_data\"\n        hparams[\"datasets\"] = []\n        return hparams\n\n    @staticmethod\n    def _raise_sharing_error(err_data, shr_data, hparam_name):\n        raise ValueError(\n            \"Must only share specifications with a preceding dataset. \"\n            \"Dataset %d has '%s=%d'\" % (err_data, hparam_name, shr_data))\n\n    @staticmethod\n    def make_vocab(hparams):\n        \"\"\"Makes a list of vocabs based on the hparams.\n\n        Args:\n            hparams (list): A list of dataset hyperparameters.\n\n        Returns:\n            A list of :class:`texar.data.Vocab` instances. Some instances\n            may be the same objects if they are set to be shared and have\n            the same other configs.\n        \"\"\"\n        if not isinstance(hparams, (list, tuple)):\n            hparams = [hparams]\n\n        vocabs = []\n        for i, hparams_i in enumerate(hparams):\n            if not _is_text_data(hparams_i[\"data_type\"]):\n                vocabs.append(None)\n                continue\n\n            proc_shr = hparams_i[\"processing_share_with\"]\n            if proc_shr is not None:\n                bos_token = hparams[proc_shr][\"bos_token\"]\n                eos_token = hparams[proc_shr][\"eos_token\"]\n            else:\n                bos_token = hparams_i[\"bos_token\"]\n                eos_token = hparams_i[\"eos_token\"]\n            bos_token = utils.default_str(\n                bos_token, SpecialTokens.BOS)\n            eos_token = utils.default_str(\n                eos_token, SpecialTokens.EOS)\n\n            vocab_shr = hparams_i[\"vocab_share_with\"]\n            if vocab_shr is not None:\n                if vocab_shr >= i:\n                    MultiAlignedData._raise_sharing_error(\n                        i, vocab_shr, \"vocab_share_with\")\n                if not vocabs[vocab_shr]:\n                    raise ValueError(\"Cannot share vocab with dataset %d which \"\n                                     \"does not have a vocab.\" % vocab_shr)\n                if bos_token == vocabs[vocab_shr].bos_token and \\\n                        eos_token == vocabs[vocab_shr].eos_token:\n                    vocab = vocabs[vocab_shr]\n                else:\n                    vocab = Vocab(hparams[vocab_shr][\"vocab_file\"],\n                                  bos_token=bos_token,\n                                  eos_token=eos_token)\n            else:\n                vocab = Vocab(hparams_i[\"vocab_file\"],\n                              bos_token=bos_token,\n                              eos_token=eos_token)\n            vocabs.append(vocab)\n\n        return vocabs\n\n    @staticmethod\n    def make_embedding(hparams, vocabs):\n        \"\"\"Optionally loads embeddings from files (if provided), and\n        returns respective :class:`texar.data.Embedding` instances.\n        \"\"\"\n        if not isinstance(hparams, (list, tuple)):\n            hparams = [hparams]\n\n        embs = []\n        for i, hparams_i in enumerate(hparams):\n            if not _is_text_data(hparams_i[\"data_type\"]):\n                embs.append(None)\n                continue\n\n            emb_shr = hparams_i[\"embedding_init_share_with\"]\n            if emb_shr is not None:\n                if emb_shr >= i:\n                    MultiAlignedData._raise_sharing_error(\n                        i, emb_shr, \"embedding_init_share_with\")\n                if not embs[emb_shr]:\n                    raise ValueError(\"Cannot share embedding with dataset %d \"\n                                     \"which does not have an embedding.\" %\n                                     emb_shr)\n                if emb_shr != hparams_i[\"vocab_share_with\"]:\n                    raise ValueError(\"'embedding_init_share_with' != \"\n                                     \"vocab_share_with. embedding_init can \"\n                                     \"be shared only when vocab is shared.\")\n                emb = embs[emb_shr]\n            else:\n                emb = None\n                emb_file = hparams_i[\"embedding_init\"][\"file\"]\n                if emb_file and emb_file != \"\":\n                    emb = Embedding(vocabs[i].token_to_id_map_py,\n                                    hparams_i[\"embedding_init\"])\n            embs.append(emb)\n\n        return embs\n\n    def _make_dataset(self):\n        datasets = []\n        for _, hparams_i in enumerate(self._hparams.datasets):\n            dtype = hparams_i.data_type\n            if _is_text_data(dtype) or _is_scalar_data(dtype):\n                dataset = tf.data.TextLineDataset(\n                    hparams_i.files,\n                    compression_type=hparams_i.compression_type)\n                datasets.append(dataset)\n            else:\n                raise ValueError(\"Unknown data type: %s\" % hparams_i.data_type)\n        return tf.data.Dataset.zip(tuple(datasets))\n\n    #@staticmethod\n    #def _get_name_prefix(dataset_hparams):\n    #    def _dtype_conflict(dtype_1, dtype_2):\n    #        conflict = ((dtype_1 == dtype_2) or\n    #                    (dtype_1 in {_DataTypes.INT, _DataTypes.FLOAT} and\n    #                     dtype_2 in {_DataTypes.INT, _DataTypes.FLOAT}))\n    #        return conflict\n\n    #    name_prefix = [hpms[\"data_name\"] for hpms in dataset_hparams]\n    #    name_prefix_dict = {}\n    #    for i, np in enumerate(name_prefix):\n    #        ids = name_prefix_dict.get(np, [])\n    #        for j in ids:\n    #            if _dtype_conflict(dataset_hparams[j][\"data_type\"],\n    #                               dataset_hparams[i][\"data_type\"]):\n    #                raise ValueError(\n    #                    \"'data_name' of the datasets with compatible \"\n    #                    \"data_types cannot be the same: %d-th dataset and \"\n    #                    \"%d-th dataset have the same name '%s'\" %\n    #                    (i, j, name_prefix[i]))\n    #        ids.append(i)\n    #        name_prefix_dict[np] = ids\n    #    return name_prefix\n\n    @staticmethod\n    def _get_name_prefix(dataset_hparams):\n        name_prefix = [hpms[\"data_name\"] for hpms in dataset_hparams]\n        for i in range(1, len(name_prefix)):\n            if name_prefix[i] in name_prefix[:i-1]:\n                raise ValueError(\"Data name duplicated: %s\" % name_prefix[i])\n        return name_prefix\n\n    @staticmethod\n    def _make_processor(dataset_hparams, data_spec, name_prefix):\n        processors = []\n        for i, hparams_i in enumerate(dataset_hparams):\n            data_spec_i = data_spec.get_ith_data_spec(i)\n\n            data_type = hparams_i[\"data_type\"]\n            if _is_text_data(data_type):\n                tgt_proc_hparams = hparams_i\n                proc_shr = hparams_i[\"processing_share_with\"]\n                if proc_shr is not None:\n                    tgt_proc_hparams = copy.copy(dataset_hparams[proc_shr])\n                    try:\n                        tgt_proc_hparams[\"variable_utterance\"] = \\\n                                hparams_i[\"variable_utterance\"]\n                    except TypeError:\n                        tgt_proc_hparams.variable_utterance = \\\n                                hparams_i[\"variable_utterance\"]\n\n                processor, data_spec_i = MonoTextData._make_processor(\n                    tgt_proc_hparams, data_spec_i)\n            elif _is_scalar_data(data_type):\n                processor, data_spec_i = ScalarData._make_processor(\n                    hparams_i, data_spec_i, name_prefix='')\n            else:\n                raise ValueError(\"Unsupported data type: %s\" % data_type)\n\n            processors.append(processor)\n            data_spec.set_ith_data_spec(i, data_spec_i, len(dataset_hparams))\n\n        tran_fn = dsutils.make_combined_transformation(\n            processors, name_prefix=name_prefix)\n\n        data_spec.add_spec(name_prefix=name_prefix)\n\n        return tran_fn, data_spec\n\n    @staticmethod\n    def _make_length_filter(dataset_hparams, length_name, decoder):\n        filter_fns = []\n        for i, hpms in enumerate(dataset_hparams):\n            if not _is_text_data(hpms[\"data_type\"]):\n                filter_fn = None\n            else:\n                filter_fn = MonoTextData._make_length_filter(\n                    hpms, length_name[i], decoder[i])\n            filter_fns.append(filter_fn)\n        combined_filter_fn = dsutils._make_combined_filter_fn(filter_fns)\n        return combined_filter_fn\n\n    def _process_dataset(self, dataset, hparams, data_spec):\n        name_prefix = self._get_name_prefix(hparams[\"datasets\"])\n        # pylint: disable=attribute-defined-outside-init\n        self._name_to_id = {v:k for k, v in enumerate(name_prefix)}\n\n        tran_fn, data_spec = self._make_processor(\n            hparams[\"datasets\"], data_spec, name_prefix)\n\n        num_parallel_calls = hparams[\"num_parallel_calls\"]\n        dataset = dataset.map(\n            lambda *args: tran_fn(dsutils.maybe_tuple(args)),\n            num_parallel_calls=num_parallel_calls)\n\n        # Filters by length\n        def _get_length_name(i):\n            if not _is_text_data(hparams[\"datasets\"][i][\"data_type\"]):\n                return None\n            name = dsutils._connect_name(\n                data_spec.name_prefix[i],\n                data_spec.decoder[i].length_tensor_name)\n            return name\n        filter_fn = self._make_length_filter(\n            hparams[\"datasets\"],\n            [_get_length_name(i) for i in range(len(hparams[\"datasets\"]))],\n            data_spec.decoder)\n        if filter_fn:\n            dataset = dataset.filter(filter_fn)\n\n        # Truncates data count\n        dataset = dataset.take(hparams[\"max_dataset_size\"])\n\n        return dataset, data_spec\n\n    def _make_bucket_length_fn(self):\n        length_fn = self._hparams.bucket_length_fn\n        if not length_fn:\n            # Uses the length of the first text data\n            i = -1\n            for i, hparams_i in enumerate(self._hparams.datasets):\n                if _is_text_data(hparams_i[\"data_type\"]):\n                    break\n            if i < 0:\n                raise ValueError(\"Undefined `length_fn`.\")\n            length_fn = lambda x: x[self.length_name(i)]\n        elif not is_callable(length_fn):\n            # pylint: disable=redefined-variable-type\n            length_fn = utils.get_function(length_fn, [\"texar.custom\"])\n        return length_fn\n\n    def _make_padded_shapes(self, dataset, decoders):\n        padded_shapes = dataset.output_shapes\n        for i, hparams_i in enumerate(self._hparams.datasets):\n            if not _is_text_data(hparams_i[\"data_type\"]):\n                continue\n            if not hparams_i[\"pad_to_max_seq_length\"]:\n                continue\n            text_and_id_shapes = MonoTextData._make_padded_text_and_id_shapes(\n                dataset, hparams_i, decoders[i],\n                self.text_name(i), self.text_id_name(i))\n\n            padded_shapes.update(text_and_id_shapes)\n\n        return padded_shapes\n\n    def _make_data(self):\n        self._vocab = self.make_vocab(self._hparams.datasets)\n        self._embedding = self.make_embedding(self._hparams.datasets,\n                                              self._vocab)\n\n        # Create dataset\n        dataset = self._make_dataset()\n        dataset, dataset_size = self._shuffle_dataset(\n            dataset, self._hparams, self._hparams.datasets[0].files)\n        self._dataset_size = dataset_size\n\n        # Processing\n        data_spec = dsutils._DataSpec(dataset=dataset,\n                                      dataset_size=self._dataset_size,\n                                      vocab=self._vocab,\n                                      embedding=self._embedding)\n        dataset, data_spec = self._process_dataset(\n            dataset, self._hparams, data_spec)\n        self._data_spec = data_spec\n        self._decoder = data_spec.decoder\n\n        # Batching\n        length_fn = self._make_bucket_length_fn()\n        padded_shapes = self._make_padded_shapes(dataset, self._decoder)\n        dataset = self._make_batch(\n            dataset, self._hparams, length_fn, padded_shapes)\n\n        # Prefetching\n        if self._hparams.prefetch_buffer_size > 0:\n            dataset = dataset.prefetch(self._hparams.prefetch_buffer_size)\n\n        self._dataset = dataset\n\n\n    def list_items(self):\n        \"\"\"Returns the list of item names that the data can produce.\n\n        Returns:\n            A list of strings.\n        \"\"\"\n        return list(self._dataset.output_types.keys())\n\n    @property\n    def dataset(self):\n        \"\"\"The dataset.\n        \"\"\"\n        return self._dataset\n\n    def dataset_size(self):\n        \"\"\"Returns the number of data instances in the dataset.\n\n        Note that this is the total data count in the raw files, before any\n        filtering and truncation.\n        \"\"\"\n        if not self._dataset_size:\n            # pylint: disable=attribute-defined-outside-init\n            self._dataset_size = count_file_lines(\n                self._hparams.datasets[0].files)\n        return self._dataset_size\n\n    def _maybe_name_to_id(self, name_or_id):\n        if is_str(name_or_id):\n            if name_or_id not in self._name_to_id:\n                raise ValueError(\"Unknown data name: {}\".format(name_or_id))\n            return self._name_to_id[name_or_id]\n        return name_or_id\n\n    def vocab(self, name_or_id):\n        \"\"\"Returns the :class:`~texar.data.Vocab` of text dataset by its name\n        or id. `None` if the dataset is not of text type.\n\n        Args:\n            name_or_id (str or int): Data name or the index of text dataset.\n        \"\"\"\n        i = self._maybe_name_to_id(name_or_id)\n        return self._vocab[i]\n\n    def embedding_init_value(self, name_or_id):\n        \"\"\"Returns the `Tensor` of embedding init value of the\n        dataset by its name or id. `None` if the dataset is not of text type.\n        \"\"\"\n        i = self._maybe_name_to_id(name_or_id)\n        return self._embedding[i]\n\n    def text_name(self, name_or_id):\n        \"\"\"The name of text tensor of text dataset by its name or id. If the\n        dataaet is not of text type, returns `None`.\n        \"\"\"\n        i = self._maybe_name_to_id(name_or_id)\n        if not _is_text_data(self._hparams.datasets[i][\"data_type\"]):\n            return None\n        name = dsutils._connect_name(\n            self._data_spec.name_prefix[i],\n            self._data_spec.decoder[i].text_tensor_name)\n        return name\n\n    def length_name(self, name_or_id):\n        \"\"\"The name of length tensor of text dataset by its name or id. If the\n        dataset is not of text type, returns `None`.\n        \"\"\"\n        i = self._maybe_name_to_id(name_or_id)\n        if not _is_text_data(self._hparams.datasets[i][\"data_type\"]):\n            return None\n        name = dsutils._connect_name(\n            self._data_spec.name_prefix[i],\n            self._data_spec.decoder[i].length_tensor_name)\n        return name\n\n    def text_id_name(self, name_or_id):\n        \"\"\"The name of length tensor of text dataset by its name or id. If the\n        dataset is not of text type, returns `None`.\n        \"\"\"\n        i = self._maybe_name_to_id(name_or_id)\n        if not _is_text_data(self._hparams.datasets[i][\"data_type\"]):\n            return None\n        name = dsutils._connect_name(\n            self._data_spec.name_prefix[i],\n            self._data_spec.decoder[i].text_id_tensor_name)\n        return name\n\n    def utterance_cnt_name(self, name_or_id):\n        \"\"\"The name of utterance count tensor of text dataset by its name or id.\n        If the dataset is not variable utterance text data, returns `None`.\n        \"\"\"\n        i = self._maybe_name_to_id(name_or_id)\n        if not _is_text_data(self._hparams.datasets[i][\"data_type\"]) or \\\n                not self._hparams.datasets[i][\"variable_utterance\"]:\n            return None\n        name = dsutils._connect_name(\n            self._data_spec.name_prefix[i],\n            self._data_spec.decoder[i].utterance_cnt_tensor_name)\n        return name\n\n    @property\n    def data_name(self, name_or_id):\n        \"\"\"The name of the data tensor of scalar dataset by its name or id..\n        If the dataset is not a scalar data, returns `None`.\n        \"\"\"\n        i = self._maybe_name_to_id(name_or_id)\n        if not _is_scalar_data(self._hparams.datasets[i][\"data_type\"]):\n            return None\n        name = dsutils._connect_name(\n            self._data_spec.name_prefix[i],\n            self._data_spec.decoder[i].data_tensor_name)\n        return name\n\n"
  },
  {
    "path": "texar_repo/texar/data/data/multi_aligned_data_test.py",
    "content": "# -*- coding: utf-8 -*-\n#\n\"\"\"\nUnit tests for data related operations.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport tempfile\nimport copy\nimport numpy as np\n\nimport tensorflow as tf\n\nimport texar as tx\n\n# pylint: disable=too-many-locals, too-many-branches, protected-access\n\nclass MultiAlignedDataTest(tf.test.TestCase):\n    \"\"\"Tests multi aligned text data class.\n    \"\"\"\n\n    def setUp(self):\n        tf.test.TestCase.setUp(self)\n\n        # Create test data\n        vocab_list = ['This', 'is', 'a', 'word', '词']\n        vocab_file = tempfile.NamedTemporaryFile()\n        vocab_file.write('\\n'.join(vocab_list).encode(\"utf-8\"))\n        vocab_file.flush()\n        self._vocab_file = vocab_file\n        self._vocab_size = len(vocab_list)\n\n        text_0 = ['This is a sentence from source .', '词 词 。 source']\n        text_0_file = tempfile.NamedTemporaryFile()\n        text_0_file.write('\\n'.join(text_0).encode(\"utf-8\"))\n        text_0_file.flush()\n        self._text_0_file = text_0_file\n\n        text_1 = ['This is a sentence from target .', '词 词 。 target']\n        text_1_file = tempfile.NamedTemporaryFile()\n        text_1_file.write('\\n'.join(text_1).encode(\"utf-8\"))\n        text_1_file.flush()\n        self._text_1_file = text_1_file\n\n        text_2 = [\n            'This is a sentence from dialog . ||| dialog ',\n            '词 词 。 ||| 词 dialog']\n        text_2_file = tempfile.NamedTemporaryFile()\n        text_2_file.write('\\n'.join(text_2).encode(\"utf-8\"))\n        text_2_file.flush()\n        self._text_2_file = text_2_file\n\n        int_3 = [0, 1]\n        int_3_file = tempfile.NamedTemporaryFile()\n        int_3_file.write(('\\n'.join([str(_) for _ in int_3])).encode(\"utf-8\"))\n        int_3_file.flush()\n        self._int_3_file = int_3_file\n\n        # Construct database\n        self._hparams = {\n            \"num_epochs\": 123,\n            \"batch_size\": 23,\n            \"datasets\": [\n                { # dataset 0\n                    \"files\": [self._text_0_file.name],\n                    \"vocab_file\": self._vocab_file.name,\n                    \"bos_token\": \"\",\n                    \"data_name\": \"0\"\n                },\n                { # dataset 1\n                    \"files\": [self._text_1_file.name],\n                    \"vocab_share_with\": 0,\n                    \"eos_token\": \"<TARGET_EOS>\",\n                    \"data_name\": \"1\"\n                },\n                { # dataset 2\n                    \"files\": [self._text_2_file.name],\n                    \"vocab_file\": self._vocab_file.name,\n                    \"processing_share_with\": 0,\n                    \"variable_utterance\": True,\n                    \"data_name\": \"2\"\n                },\n                { # dataset 3\n                    \"files\": self._int_3_file.name,\n                    \"data_type\": \"int\",\n                    \"data_name\": \"label\"\n                },\n            ]\n        }\n\n    def _run_and_test(self, hparams, discard_did=None):\n        # Construct database\n        text_data = tx.data.MultiAlignedData(hparams)\n        self.assertEqual(\n            text_data.vocab(0).size,\n            self._vocab_size + len(text_data.vocab(0).special_tokens))\n\n        iterator = text_data.dataset.make_initializable_iterator()\n        text_data_batch = iterator.get_next()\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            sess.run(tf.local_variables_initializer())\n            sess.run(tf.tables_initializer())\n            sess.run(iterator.initializer)\n\n            while True:\n                try:\n                    # Run the logics\n                    data_batch_ = sess.run(text_data_batch)\n\n                    self.assertEqual(set(data_batch_.keys()),\n                                     set(text_data.list_items()))\n                    self.assertEqual(text_data.utterance_cnt_name('2'),\n                                     '2_utterance_cnt')\n                    text_0 = data_batch_['0_text']\n                    text_1 = data_batch_['1_text']\n                    text_2 = data_batch_['2_text']\n                    int_3 = data_batch_['label']\n                    # pylint: disable=invalid-name\n                    for t0, t1, t2, i3 in zip(text_0, text_1, text_2, int_3):\n                        np.testing.assert_array_equal(\n                            t0[:2], t1[1:3])\n                        np.testing.assert_array_equal(\n                            t0[:3], t2[0][:3])\n                        if t0[0].startswith(b'This'):\n                            self.assertEqual(i3, 0)\n                        else:\n                            self.assertEqual(i3, 1)\n\n                    if discard_did is not None:\n                        hpms = text_data._hparams.datasets[discard_did]\n                        max_l = hpms.max_seq_length\n                        max_l += text_data._decoder[discard_did].added_length\n                        for i in range(2):\n                            for length in data_batch_[text_data.length_name(i)]:\n                                self.assertLessEqual(length, max_l)\n                        for lengths in data_batch_[text_data.length_name(2)]:\n                            for length in lengths:\n                                self.assertLessEqual(length, max_l)\n                    for i, hpms in enumerate(text_data._hparams.datasets):\n                        if hpms.data_type != \"text\":\n                            continue\n                        max_l = hpms.max_seq_length\n                        mode = hpms.length_filter_mode\n                        if max_l is not None and mode == \"truncate\":\n                            max_l += text_data._decoder[i].added_length\n                            for length in data_batch_[text_data.length_name(i)]:\n                                self.assertLessEqual(length, max_l)\n\n                except tf.errors.OutOfRangeError:\n                    print('Done -- epoch limit reached')\n                    break\n\n    def test_default_setting(self):\n        \"\"\"Tests the logics of the text data.\n        \"\"\"\n        self._run_and_test(self._hparams)\n\n    def test_length_filter(self):\n        \"\"\"Tests filtering by length.\n        \"\"\"\n        hparams = copy.copy(self._hparams)\n        hparams[\"datasets\"][0].update(\n            {\"max_seq_length\": 4,\n             \"length_filter_mode\": \"discard\"})\n        hparams[\"datasets\"][1].update(\n            {\"max_seq_length\": 2,\n             \"length_filter_mode\": \"truncate\"})\n        self._run_and_test(hparams, discard_did=0)\n\n\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/data/data/paired_text_data.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nPaired text data that consists of source text and target text.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport copy\n\nimport tensorflow as tf\n\nfrom texar.utils import utils\nfrom texar.utils.dtypes import is_callable\nfrom texar.data.data.mono_text_data import _default_mono_text_dataset_hparams\nfrom texar.data.data.text_data_base import TextDataBase\nfrom texar.data.data.mono_text_data import MonoTextData\nfrom texar.data.data_utils import count_file_lines\nfrom texar.data.data import dataset_utils as dsutils\nfrom texar.data.vocabulary import Vocab, SpecialTokens\nfrom texar.data.embedding import Embedding\n\n# pylint: disable=invalid-name, arguments-differ, not-context-manager\n# pylint: disable=protected-access, too-many-arguments\n\n__all__ = [\n    \"_default_paired_text_dataset_hparams\",\n    \"PairedTextData\"\n]\n\ndef _default_paired_text_dataset_hparams():\n    \"\"\"Returns hyperparameters of a paired text dataset with default values.\n\n    See :meth:`texar.data.PairedTextData.default_hparams` for details.\n    \"\"\"\n    source_hparams = _default_mono_text_dataset_hparams()\n    source_hparams[\"bos_token\"] = None\n    source_hparams[\"data_name\"] = \"source\"\n    target_hparams = _default_mono_text_dataset_hparams()\n    target_hparams.update(\n        {\n            \"vocab_share\": False,\n            \"embedding_init_share\": False,\n            \"processing_share\": False,\n            \"data_name\": \"target\"\n        }\n    )\n    return {\n        \"source_dataset\": source_hparams,\n        \"target_dataset\": target_hparams\n    }\n\n# pylint: disable=too-many-instance-attributes, too-many-public-methods\nclass PairedTextData(TextDataBase):\n    \"\"\"Text data processor that reads parallel source and target text.\n    This can be used in, e.g., seq2seq models.\n\n    Args:\n        hparams (dict): Hyperparameters. See :meth:`default_hparams` for the\n            defaults.\n\n    By default, the processor reads raw data files, performs tokenization,\n    batching and other pre-processing steps, and results in a TF Dataset\n    whose element is a python `dict` including six fields:\n\n        - \"source_text\":\n            A string Tensor of shape `[batch_size, max_time]` containing\n            the **raw** text toknes of source sequences. `max_time` is the\n            length of the longest sequence in the batch.\n            Short sequences in the batch are padded with **empty string**.\n            By default only EOS token is appended to each sequence.\n            Out-of-vocabulary tokens are **NOT** replaced with UNK.\n        - \"source_text_ids\":\n            An `int64` Tensor of shape `[batch_size, max_time]`\n            containing the token indexes of source sequences.\n        - \"source_length\":\n            An `int` Tensor of shape `[batch_size]` containing the\n            length of each source sequence in the batch (including BOS and/or\n            EOS if added).\n        - \"target_text\":\n            A string Tensor as \"source_text\" but for target sequences. By\n            default both BOS and EOS are added.\n        - \"target_text_ids\":\n            An `int64` Tensor as \"source_text_ids\" but for target sequences.\n        - \"target_length\":\n            An `int` Tensor of shape `[batch_size]` as \"source_length\" but for\n            target sequences.\n\n    If :attr:`'variable_utterance'` is set to `True` in :attr:`'source_dataset'`\n    and/or :attr:`'target_dataset'` of :attr:`hparams`, the corresponding\n    fields \"source_*\" and/or \"target_*\" are respectively changed to contain\n    variable utterance text data, as in :class:`~texar.data.MonoTextData`.\n\n    The above field names can be accessed through :attr:`source_text_name`,\n    :attr:`source_text_id_name`, :attr:`source_length_name`,\n    :attr:`source_utterance_cnt_name`, and those prefixed with `target_`,\n    respectively.\n\n    Example:\n\n        .. code-block:: python\n\n            hparams={\n                'source_dataset': {'files': 's', 'vocab_file': 'vs'},\n                'target_dataset': {'files': ['t1', 't2'], 'vocab_file': 'vt'},\n                'batch_size': 1\n            }\n            data = PairedTextData(hparams)\n            iterator = DataIterator(data)\n            batch = iterator.get_next()\n\n            iterator.switch_to_dataset(sess) # initializes the dataset\n            batch_ = sess.run(batch)\n            # batch_ == {\n            #    'source_text': [['source', 'sequence', '<EOS>']],\n            #    'source_text_ids': [[5, 10, 2]],\n            #    'source_length': [3]\n            #    'target_text': [['<BOS>', 'target', 'sequence', '1', '<EOS>']],\n            #    'target_text_ids': [[1, 6, 10, 20, 2]],\n            #    'target_length': [5]\n            # }\n\n    \"\"\"\n    def __init__(self, hparams):\n        TextDataBase.__init__(self, hparams)\n        with tf.name_scope(self.name, self.default_hparams()[\"name\"]):\n            self._make_data()\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dicitionary of default hyperparameters.\n\n        .. code-block:: python\n\n            {\n                # (1) Hyperparams specific to text dataset\n                \"source_dataset\": {\n                    \"files\": [],\n                    \"compression_type\": None,\n                    \"vocab_file\": \"\",\n                    \"embedding_init\": {},\n                    \"delimiter\": \" \",\n                    \"max_seq_length\": None,\n                    \"length_filter_mode\": \"truncate\",\n                    \"pad_to_max_seq_length\": False,\n                    \"bos_token\": None,\n                    \"eos_token\": \"<EOS>\",\n                    \"other_transformations\": [],\n                    \"variable_utterance\": False,\n                    \"utterance_delimiter\": \"|||\",\n                    \"max_utterance_cnt\": 5,\n                    \"data_name\": \"source\",\n                },\n                \"target_dataset\": {\n                    # ...\n                    # Same fields are allowed as in \"source_dataset\" with the\n                    # same default values, except the\n                    # following new fields/values:\n                    \"bos_token\": \"<BOS>\"\n                    \"vocab_share\": False,\n                    \"embedding_init_share\": False,\n                    \"processing_share\": False,\n                    \"data_name\": \"target\"\n                }\n                # (2) General hyperparams\n                \"num_epochs\": 1,\n                \"batch_size\": 64,\n                \"allow_smaller_final_batch\": True,\n                \"shuffle\": True,\n                \"shuffle_buffer_size\": None,\n                \"shard_and_shuffle\": False,\n                \"num_parallel_calls\": 1,\n                \"prefetch_buffer_size\": 0,\n                \"max_dataset_size\": -1,\n                \"seed\": None,\n                \"name\": \"paired_text_data\",\n                # (3) Bucketing\n                \"bucket_boundaries\": [],\n                \"bucket_batch_sizes\": None,\n                \"bucket_length_fn\": None,\n            }\n\n        Here:\n\n        1. Hyperparameters in the :attr:`\"source_dataset\"` and\n        attr:`\"target_dataset\"` fields have the same definition as those\n        in :meth:`texar.data.MonoTextData.default_hparams`, for source and\n        target text, respectively.\n\n        For the new hyperparameters in \"target_dataset\":\n\n            \"vocab_share\" : bool\n                Whether to share the vocabulary of source.\n                If `True`, the vocab file of target is ignored.\n\n            \"embedding_init_share\" : bool\n                Whether to share the embedding initial value of source. If\n                `True`, :attr:`\"embedding_init\"` of target is ignored.\n\n                :attr:`\"vocab_share\"` must be true to share the embedding\n                initial value.\n\n            \"processing_share\" : bool\n                Whether to share the processing configurations of source,\n                including\n                \"delimiter\", \"bos_token\", \"eos_token\", and\n                \"other_transformations\".\n\n        2. For the **general** hyperparameters, see\n        :meth:`texar.data.DataBase.default_hparams` for details.\n\n        3. For **bucketing** hyperparameters, see\n        :meth:`texar.data.MonoTextData.default_hparams` for details, except\n        that the default bucket_length_fn is the maximum sequence length\n        of source and target sequences.\n\n        \"\"\"\n        hparams = TextDataBase.default_hparams()\n        hparams[\"name\"] = \"paired_text_data\"\n        hparams.update(_default_paired_text_dataset_hparams())\n        return hparams\n\n    @staticmethod\n    def make_vocab(src_hparams, tgt_hparams):\n        \"\"\"Reads vocab files and returns source vocab and target vocab.\n\n        Args:\n            src_hparams (dict or HParams): Hyperparameters of source dataset.\n            tgt_hparams (dict or HParams): Hyperparameters of target dataset.\n\n        Returns:\n            A pair of :class:`texar.data.Vocab` instances. The two instances\n            may be the same objects if source and target vocabs are shared\n            and have the same other configs.\n        \"\"\"\n        src_vocab = MonoTextData.make_vocab(src_hparams)\n\n        if tgt_hparams[\"processing_share\"]:\n            tgt_bos_token = src_hparams[\"bos_token\"]\n            tgt_eos_token = src_hparams[\"eos_token\"]\n        else:\n            tgt_bos_token = tgt_hparams[\"bos_token\"]\n            tgt_eos_token = tgt_hparams[\"eos_token\"]\n        tgt_bos_token = utils.default_str(tgt_bos_token,\n                                          SpecialTokens.BOS)\n        tgt_eos_token = utils.default_str(tgt_eos_token,\n                                          SpecialTokens.EOS)\n        if tgt_hparams[\"vocab_share\"]:\n            if tgt_bos_token == src_vocab.bos_token and \\\n                    tgt_eos_token == src_vocab.eos_token:\n                tgt_vocab = src_vocab\n            else:\n                tgt_vocab = Vocab(src_hparams[\"vocab_file\"],\n                                  bos_token=tgt_bos_token,\n                                  eos_token=tgt_eos_token)\n        else:\n            tgt_vocab = Vocab(tgt_hparams[\"vocab_file\"],\n                              bos_token=tgt_bos_token,\n                              eos_token=tgt_eos_token)\n\n        return src_vocab, tgt_vocab\n\n\n    @staticmethod\n    def make_embedding(src_emb_hparams, src_token_to_id_map,\n                       tgt_emb_hparams=None, tgt_token_to_id_map=None,\n                       emb_init_share=False):\n        \"\"\"Optionally loads source and target embeddings from files\n        (if provided), and returns respective :class:`texar.data.Embedding`\n        instances.\n        \"\"\"\n        src_embedding = MonoTextData.make_embedding(src_emb_hparams,\n                                                    src_token_to_id_map)\n\n        if emb_init_share:\n            tgt_embedding = src_embedding\n        else:\n            tgt_emb_file = tgt_emb_hparams[\"file\"]\n            tgt_embedding = None\n            if tgt_emb_file is not None and tgt_emb_file != \"\":\n                tgt_embedding = Embedding(tgt_token_to_id_map, tgt_emb_hparams)\n\n        return src_embedding, tgt_embedding\n\n    def _make_dataset(self):\n        src_dataset = tf.data.TextLineDataset(\n            self._hparams.source_dataset.files,\n            compression_type=self._hparams.source_dataset.compression_type)\n        tgt_dataset = tf.data.TextLineDataset(\n            self._hparams.target_dataset.files,\n            compression_type=self._hparams.target_dataset.compression_type)\n        return tf.data.Dataset.zip((src_dataset, tgt_dataset))\n\n    @staticmethod\n    def _get_name_prefix(src_hparams, tgt_hparams):\n        name_prefix = [\n            src_hparams[\"data_name\"], tgt_hparams[\"data_name\"]]\n        if name_prefix[0] == name_prefix[1]:\n            raise ValueError(\"'data_name' of source and target \"\n                             \"datasets cannot be the same.\")\n        return name_prefix\n\n    @staticmethod\n    def _make_processor(src_hparams, tgt_hparams, data_spec, name_prefix):\n        # Create source data decoder\n        data_spec_i = data_spec.get_ith_data_spec(0)\n        src_decoder, src_trans, data_spec_i = MonoTextData._make_processor(\n            src_hparams, data_spec_i, chained=False)\n        data_spec.set_ith_data_spec(0, data_spec_i, 2)\n\n        # Create target data decoder\n        tgt_proc_hparams = tgt_hparams\n        if tgt_hparams[\"processing_share\"]:\n            tgt_proc_hparams = copy.copy(src_hparams)\n            try:\n                tgt_proc_hparams[\"variable_utterance\"] = \\\n                        tgt_hparams[\"variable_utterance\"]\n            except TypeError:\n                tgt_proc_hparams.variable_utterance = \\\n                        tgt_hparams[\"variable_utterance\"]\n        data_spec_i = data_spec.get_ith_data_spec(1)\n        tgt_decoder, tgt_trans, data_spec_i = MonoTextData._make_processor(\n            tgt_proc_hparams, data_spec_i, chained=False)\n        data_spec.set_ith_data_spec(1, data_spec_i, 2)\n\n        tran_fn = dsutils.make_combined_transformation(\n            [[src_decoder] + src_trans, [tgt_decoder] + tgt_trans],\n            name_prefix=name_prefix)\n\n        data_spec.add_spec(name_prefix=name_prefix)\n\n        return tran_fn, data_spec\n\n    @staticmethod\n    def _make_length_filter(src_hparams, tgt_hparams,\n                            src_length_name, tgt_length_name,\n                            src_decoder, tgt_decoder):\n        src_filter_fn = MonoTextData._make_length_filter(\n            src_hparams, src_length_name, src_decoder)\n        tgt_filter_fn = MonoTextData._make_length_filter(\n            tgt_hparams, tgt_length_name, tgt_decoder)\n        combined_filter_fn = dsutils._make_combined_filter_fn(\n            [src_filter_fn, tgt_filter_fn])\n        return combined_filter_fn\n\n    def _process_dataset(self, dataset, hparams, data_spec):\n        name_prefix = PairedTextData._get_name_prefix(\n            hparams[\"source_dataset\"], hparams[\"target_dataset\"])\n        tran_fn, data_spec = self._make_processor(\n            hparams[\"source_dataset\"], hparams[\"target_dataset\"],\n            data_spec, name_prefix=name_prefix)\n\n        num_parallel_calls = hparams[\"num_parallel_calls\"]\n        dataset = dataset.map(\n            lambda *args: tran_fn(dsutils.maybe_tuple(args)),\n            num_parallel_calls=num_parallel_calls)\n\n        # Filters by length\n        src_length_name = dsutils._connect_name(\n            data_spec.name_prefix[0],\n            data_spec.decoder[0].length_tensor_name)\n        tgt_length_name = dsutils._connect_name(\n            data_spec.name_prefix[1],\n            data_spec.decoder[1].length_tensor_name)\n        filter_fn = self._make_length_filter(\n            hparams[\"source_dataset\"], hparams[\"target_dataset\"],\n            src_length_name, tgt_length_name,\n            data_spec.decoder[0], data_spec.decoder[1])\n        if filter_fn:\n            dataset = dataset.filter(filter_fn)\n\n        # Truncates data count\n        dataset = dataset.take(hparams[\"max_dataset_size\"])\n\n        return dataset, data_spec\n\n    def _make_bucket_length_fn(self):\n        length_fn = self._hparams.bucket_length_fn\n        if not length_fn:\n            length_fn = lambda x: tf.maximum(\n                x[self.source_length_name], x[self.target_length_name])\n        elif not is_callable(length_fn):\n            # pylint: disable=redefined-variable-type\n            length_fn = utils.get_function(length_fn, [\"texar.custom\"])\n        return length_fn\n\n    def _make_padded_shapes(self, dataset, src_decoder, tgt_decoder):\n        src_text_and_id_shapes = {}\n        if self._hparams.source_dataset.pad_to_max_seq_length:\n            src_text_and_id_shapes = \\\n                    MonoTextData._make_padded_text_and_id_shapes(\n                        dataset, self._hparams.source_dataset, src_decoder,\n                        self.source_text_name, self.source_text_id_name)\n\n        tgt_text_and_id_shapes = {}\n        if self._hparams.target_dataset.pad_to_max_seq_length:\n            tgt_text_and_id_shapes = \\\n                    MonoTextData._make_padded_text_and_id_shapes(\n                        dataset, self._hparams.target_dataset, tgt_decoder,\n                        self.target_text_name, self.target_text_id_name)\n\n        padded_shapes = dataset.output_shapes\n        padded_shapes.update(src_text_and_id_shapes)\n        padded_shapes.update(tgt_text_and_id_shapes)\n\n        return padded_shapes\n\n    def _make_data(self):\n        self._src_vocab, self._tgt_vocab = self.make_vocab(\n            self._hparams.source_dataset, self._hparams.target_dataset)\n\n        tgt_hparams = self._hparams.target_dataset\n        if not tgt_hparams.vocab_share and tgt_hparams.embedding_init_share:\n            raise ValueError(\"embedding_init can be shared only when vocab \"\n                             \"is shared. Got `vocab_share=False, \"\n                             \"emb_init_share=True`.\")\n        self._src_embedding, self._tgt_embedding = self.make_embedding(\n            self._hparams.source_dataset.embedding_init,\n            self._src_vocab.token_to_id_map_py,\n            self._hparams.target_dataset.embedding_init,\n            self._tgt_vocab.token_to_id_map_py,\n            self._hparams.target_dataset.embedding_init_share)\n\n        # Create dataset\n        dataset = self._make_dataset()\n        dataset, dataset_size = self._shuffle_dataset(\n            dataset, self._hparams, self._hparams.source_dataset.files)\n        self._dataset_size = dataset_size\n\n        # Processing.\n        data_spec = dsutils._DataSpec(\n            dataset=dataset, dataset_size=self._dataset_size,\n            vocab=[self._src_vocab, self._tgt_vocab],\n            embedding=[self._src_embedding, self._tgt_embedding])\n        dataset, data_spec = self._process_dataset(\n            dataset, self._hparams, data_spec)\n        self._data_spec = data_spec\n        self._decoder = data_spec.decoder\n        self._src_decoder = data_spec.decoder[0]\n        self._tgt_decoder = data_spec.decoder[1]\n\n        # Batching\n        length_fn = self._make_bucket_length_fn()\n        padded_shapes = self._make_padded_shapes(\n            dataset, self._src_decoder, self._tgt_decoder)\n        dataset = self._make_batch(\n            dataset, self._hparams, length_fn, padded_shapes)\n\n        # Prefetching\n        if self._hparams.prefetch_buffer_size > 0:\n            dataset = dataset.prefetch(self._hparams.prefetch_buffer_size)\n\n        self._dataset = dataset\n\n    def list_items(self):\n        \"\"\"Returns the list of item names that the data can produce.\n\n        Returns:\n            A list of strings.\n        \"\"\"\n        return list(self._dataset.output_types.keys())\n\n    @property\n    def dataset(self):\n        \"\"\"The dataset.\n        \"\"\"\n        return self._dataset\n\n    def dataset_size(self):\n        \"\"\"Returns the number of data instances in the dataset.\n\n        Note that this is the total data count in the raw files, before any\n        filtering and truncation.\n        \"\"\"\n        if not self._dataset_size:\n            # pylint: disable=attribute-defined-outside-init\n            self._dataset_size = count_file_lines(\n                self._hparams.source_dataset.files)\n        return self._dataset_size\n\n    @property\n    def vocab(self):\n        \"\"\"A pair instances of :class:`~texar.data.Vocab` that are source\n        and target vocabs, respectively.\n        \"\"\"\n        return self._src_vocab, self._tgt_vocab\n\n    @property\n    def source_vocab(self):\n        \"\"\"The source vocab, an instance of :class:`~texar.data.Vocab`.\n        \"\"\"\n        return self._src_vocab\n\n    @property\n    def target_vocab(self):\n        \"\"\"The target vocab, an instance of :class:`~texar.data.Vocab`.\n        \"\"\"\n        return self._tgt_vocab\n\n    @property\n    def source_embedding_init_value(self):\n        \"\"\"The `Tensor` containing the embedding value of source data\n        loaded from file. `None` if embedding is not specified.\n        \"\"\"\n        if self._src_embedding is None:\n            return None\n        return self._src_embedding.word_vecs\n\n    @property\n    def target_embedding_init_value(self):\n        \"\"\"The `Tensor` containing the embedding value of target data\n        loaded from file. `None` if embedding is not specified.\n        \"\"\"\n        if self._tgt_embedding is None:\n            return None\n        return self._tgt_embedding.word_vecs\n\n    def embedding_init_value(self):\n        \"\"\"A pair of `Tensor` containing the embedding values of source and\n        target data loaded from file.\n        \"\"\"\n        src_emb = self.source_embedding_init_value\n        tgt_emb = self.target_embedding_init_value\n        return src_emb, tgt_emb\n\n    @property\n    def source_text_name(self):\n        \"\"\"The name of the source text tensor, \"source_text\" by default.\n        \"\"\"\n        name = dsutils._connect_name(\n            self._data_spec.name_prefix[0],\n            self._src_decoder.text_tensor_name)\n        return name\n\n    @property\n    def source_length_name(self):\n        \"\"\"The name of the source length tensor, \"source_length\" by default.\n        \"\"\"\n        name = dsutils._connect_name(\n            self._data_spec.name_prefix[0],\n            self._src_decoder.length_tensor_name)\n        return name\n\n    @property\n    def source_text_id_name(self):\n        \"\"\"The name of the source text index tensor, \"source_text_ids\" by\n        default.\n        \"\"\"\n        name = dsutils._connect_name(\n            self._data_spec.name_prefix[0],\n            self._src_decoder.text_id_tensor_name)\n        return name\n\n    @property\n    def source_utterance_cnt_name(self):\n        \"\"\"The name of the source text utterance count tensor,\n        \"source_utterance_cnt\" by default.\n        \"\"\"\n        if not self._hparams.source_dataset.variable_utterance:\n            raise ValueError(\n                \"`utterance_cnt_name` of source data is undefined.\")\n        name = dsutils._connect_name(\n            self._data_spec.name_prefix[0],\n            self._src_decoder.utterance_cnt_tensor_name)\n        return name\n\n    @property\n    def target_text_name(self):\n        \"\"\"The name of the target text tensor, \"target_text\" bt default.\n        \"\"\"\n        name = dsutils._connect_name(\n            self._data_spec.name_prefix[1],\n            self._tgt_decoder.text_tensor_name)\n        return name\n\n    @property\n    def target_length_name(self):\n        \"\"\"The name of the target length tensor, \"target_length\" by default.\n        \"\"\"\n        name = dsutils._connect_name(\n            self._data_spec.name_prefix[1],\n            self._tgt_decoder.length_tensor_name)\n        return name\n\n    @property\n    def target_text_id_name(self):\n        \"\"\"The name of the target text index tensor, \"target_text_ids\" by\n        default.\n        \"\"\"\n        name = dsutils._connect_name(\n            self._data_spec.name_prefix[1],\n            self._tgt_decoder.text_id_tensor_name)\n        return name\n\n    @property\n    def target_utterance_cnt_name(self):\n        \"\"\"The name of the target text utterance count tensor,\n        \"target_utterance_cnt\" by default.\n        \"\"\"\n        if not self._hparams.target_dataset.variable_utterance:\n            raise ValueError(\n                \"`utterance_cnt_name` of target data is undefined.\")\n        name = dsutils._connect_name(\n            self._data_spec.name_prefix[1],\n            self._tgt_decoder.utterance_cnt_tensor_name)\n        return name\n\n    @property\n    def text_name(self):\n        \"\"\"The name of text tensor, \"text\" by default.\n        \"\"\"\n        return self._src_decoder.text_tensor_name\n\n    @property\n    def length_name(self):\n        \"\"\"The name of length tensor, \"length\" by default.\n        \"\"\"\n        return self._src_decoder.length_tensor_name\n\n    @property\n    def text_id_name(self):\n        \"\"\"The name of text index tensor, \"text_ids\" by default.\n        \"\"\"\n        return self._src_decoder.text_id_tensor_name\n\n    @property\n    def utterance_cnt_name(self):\n        \"\"\"The name of the text utterance count tensor, \"utterance_cnt\" by\n        default.\n        \"\"\"\n        if self._hparams.source_dataset.variable_utterance:\n            return self._src_decoder.utterance_cnt_tensor_name\n        if self._hparams.target_dataset.variable_utterance:\n            return self._tgt_decoder.utterance_cnt_tensor_name\n        raise ValueError(\"`utterance_cnt_name` is not defined.\")\n"
  },
  {
    "path": "texar_repo/texar/data/data/paired_text_data_test.py",
    "content": "# -*- coding: utf-8 -*-\n#\n\"\"\"\nUnit tests for data related operations.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport tempfile\nimport copy\nimport numpy as np\n\nimport tensorflow as tf\n\nimport texar as tx\nfrom texar.data import SpecialTokens\n\n# pylint: disable=too-many-locals, too-many-branches, protected-access\n# pylint: disable=invalid-name\n\nclass PairedTextDataTest(tf.test.TestCase):\n    \"\"\"Tests paired text data class.\n    \"\"\"\n\n    def setUp(self):\n        tf.test.TestCase.setUp(self)\n\n        # Create test data\n        vocab_list = ['This', 'is', 'a', 'word', '词']\n        vocab_file = tempfile.NamedTemporaryFile()\n        vocab_file.write('\\n'.join(vocab_list).encode(\"utf-8\"))\n        vocab_file.flush()\n        self._vocab_file = vocab_file\n        self._vocab_size = len(vocab_list)\n\n        src_text = ['This is a sentence from source .', '词 词 。 source']\n        src_text_file = tempfile.NamedTemporaryFile()\n        src_text_file.write('\\n'.join(src_text).encode(\"utf-8\"))\n        src_text_file.flush()\n        self._src_text_file = src_text_file\n\n        tgt_text = ['This is a sentence from target .', '词 词 。 target']\n        tgt_text_file = tempfile.NamedTemporaryFile()\n        tgt_text_file.write('\\n'.join(tgt_text).encode(\"utf-8\"))\n        tgt_text_file.flush()\n        self._tgt_text_file = tgt_text_file\n\n        self._hparams = {\n            \"num_epochs\": 50,\n            \"batch_size\": 3,\n            \"source_dataset\": {\n                \"files\": [self._src_text_file.name],\n                \"vocab_file\": self._vocab_file.name,\n            },\n            \"target_dataset\": {\n                \"files\": self._tgt_text_file.name,\n                \"vocab_share\": True,\n                \"eos_token\": \"<TARGET_EOS>\"\n            }\n        }\n\n    def _run_and_test(self, hparams, proc_shr=False, length_inc=None,\n                      discard_src=False):\n        # Construct database\n        text_data = tx.data.PairedTextData(hparams)\n        self.assertEqual(\n            text_data.source_vocab.size,\n            self._vocab_size + len(text_data.source_vocab.special_tokens))\n\n        iterator = text_data.dataset.make_initializable_iterator()\n        text_data_batch = iterator.get_next()\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            sess.run(tf.local_variables_initializer())\n            sess.run(tf.tables_initializer())\n            sess.run(iterator.initializer)\n\n            if proc_shr:\n                tgt_eos = b'<EOS>'\n            else:\n                tgt_eos = b'<TARGET_EOS>'\n\n            while True:\n                try:\n                    # Run the logics\n                    data_batch_ = sess.run(text_data_batch)\n                    self.assertEqual(set(data_batch_.keys()),\n                                     set(text_data.list_items()))\n                    # Test matching\n                    src_text = data_batch_['source_text']\n                    tgt_text = data_batch_['target_text']\n                    if proc_shr:\n                        for src, tgt in zip(src_text, tgt_text):\n                            np.testing.assert_array_equal(src[:3], tgt[:3])\n                    else:\n                        for src, tgt in zip(src_text, tgt_text):\n                            np.testing.assert_array_equal(src[:3], tgt[1:4])\n                    self.assertTrue(\n                        tgt_eos in data_batch_['target_text'][0])\n\n                    if length_inc:\n                        for i in range(len(data_batch_['source_text'])):\n                            text_ = data_batch_['source_text'][i].tolist()\n                            self.assertEqual(\n                                text_.index(b'<EOS>') + 1,\n                                data_batch_['source_length'][i] - length_inc[0])\n                        for i in range(len(data_batch_['target_text'])):\n                            text_ = data_batch_['target_text'][i].tolist()\n                            self.assertEqual(\n                                text_.index(tgt_eos) + 1,\n                                data_batch_['target_length'][i] - length_inc[1])\n\n                    if discard_src:\n                        src_hparams = text_data.hparams.source_dataset\n                        max_l = src_hparams.max_seq_length\n                        max_l += text_data._decoder[0].added_length\n                        for l in data_batch_[text_data.source_length_name]:\n                            self.assertLessEqual(l, max_l)\n\n                except tf.errors.OutOfRangeError:\n                    print('Done -- epoch limit reached')\n                    break\n\n    def test_default_setting(self):\n        \"\"\"Tests the logics of the text data.\n        \"\"\"\n        self._run_and_test(self._hparams)\n\n    def test_shuffle(self):\n        \"\"\"Tests toggling shuffle.\n        \"\"\"\n        hparams = copy.copy(self._hparams)\n        hparams[\"shuffle\"] = False\n        self._run_and_test(hparams)\n\n    def test_processing_share(self):\n        \"\"\"Tests sharing processing.\n        \"\"\"\n        hparams = copy.copy(self._hparams)\n        hparams[\"target_dataset\"][\"processing_share\"] = True\n        self._run_and_test(hparams, proc_shr=True)\n\n    def test_other_transformations(self):\n        \"\"\"Tests use of other transformations\n        \"\"\"\n        def _transform(x, data_specs): # pylint: disable=invalid-name\n            x[data_specs.decoder.length_tensor_name] += 1\n            return x\n\n        hparams = copy.copy(self._hparams)\n        hparams[\"source_dataset\"].update(\n            {\"other_transformations\": [_transform, _transform]})\n        hparams[\"target_dataset\"].update(\n            {\"other_transformations\": [_transform]})\n        self._run_and_test(hparams, length_inc=(2, 1))\n\n    def test_length_filter(self):\n        \"\"\"Tests filtering by length.\n        \"\"\"\n        hparams = copy.copy(self._hparams)\n        hparams[\"source_dataset\"].update(\n            {\"max_seq_length\": 4,\n             \"length_filter_mode\": \"discard\"})\n        self._run_and_test(hparams, discard_src=True)\n\n    #def test_sequence_length(self):\n    #    hparams = {\n    #        \"batch_size\": 64,\n    #        \"num_epochs\": 1,\n    #        \"shuffle\": False,\n    #        \"allow_smaller_final_batch\": False,\n    #        \"source_dataset\": {\n    #            \"files\": \"../../../data/yelp/sentiment.dev.sort.0\",\n    #            \"vocab_file\": \"../../../data/yelp/vocab\",\n    #            \"bos_token\": SpecialTokens.BOS,\n    #            \"eos_token\": SpecialTokens.EOS,\n    #        },\n    #        \"target_dataset\": {\n    #            \"files\": \"../../../data/yelp/sentiment.dev.sort.1\",\n    #            \"vocab_share\": True,\n    #        },\n    #    }\n    #    data = tx.data.PairedTextData(hparams)\n\n    #    iterator = tx.data.TrainTestDataIterator(val=data)\n    #    text_data_batch = iterator.get_next()\n\n    #    with self.test_session() as sess:\n    #        sess.run(tf.global_variables_initializer())\n    #        sess.run(tf.local_variables_initializer())\n    #        sess.run(tf.tables_initializer())\n    #        iterator.switch_to_val_data(sess)\n\n    #        while True:\n    #            try:\n    #                data_batch_ = sess.run(text_data_batch)\n    #                src = data_batch_[\"source_text_ids\"]\n    #                src_len = data_batch_[\"source_length\"]\n    #                self.assertEqual(src.shape[1], np.max(src_len))\n    #                tgt = data_batch_[\"target_text_ids\"]\n    #                tgt_len = data_batch_[\"target_length\"]\n    #                self.assertEqual(tgt.shape[1], np.max(tgt_len))\n    #            except tf.errors.OutOfRangeError:\n    #                break\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/data/data/scalar_data.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nVarious data classes that define data reading, parsing, batching, and other\npreprocessing operations.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport tensorflow as tf\n\nfrom texar.data.data_utils import count_file_lines\nfrom texar.data.data import dataset_utils as dsutils\nfrom texar.data.data.data_base import DataBase\nfrom texar.data.data.mono_text_data import MonoTextData\nfrom texar.data.data_decoders import ScalarDataDecoder\n\n# pylint: disable=invalid-name, arguments-differ, not-context-manager\n\n__all__ = [\n    \"_default_scalar_dataset_hparams\",\n    \"ScalarData\"\n]\n\ndef _default_scalar_dataset_hparams():\n    \"\"\"Returns hyperparameters of a scalar dataset with default values.\n\n    See :meth:`texar.data.ScalarData.default_hparams` for details.\n    \"\"\"\n    return {\n        \"files\": [],\n        \"compression_type\": None,\n        \"data_type\": \"int\",\n        \"data_name\": None,\n        \"other_transformations\": [],\n        \"@no_typecheck\": [\"files\"]\n    }\n\nclass ScalarData(DataBase):\n    \"\"\"Scalar data where each line of the files is a scalar (int or float),\n    e.g., a data label.\n\n    Args:\n        hparams (dict): Hyperparameters. See :meth:`default_hparams` for the\n            defaults.\n\n    The processor reads and processes raw data and results in a TF dataset\n    whose element is a python `dict` including one field. The field name is\n    specified in :attr:`hparams[\"dataset\"][\"data_name\"]`. If not specified,\n    the default name is `\"data\"`. The field name can be accessed through\n    :attr:`data_name`.\n\n    This field is a Tensor of shape `[batch_size]` containing a batch of\n    scalars, of either int or float type as specified in :attr:`hparams`.\n\n    Example:\n\n        .. code-block:: python\n\n            hparams={\n                'dataset': { 'files': 'data.txt', 'data_name': 'label' },\n                'batch_size': 2\n            }\n            data = ScalarData(hparams)\n            iterator = DataIterator(data)\n            batch = iterator.get_next()\n\n            iterator.switch_to_dataset(sess) # initializes the dataset\n            batch_ = sess.run(batch)\n            # batch_ == {\n            #     'label': [2, 9]\n            # }\n    \"\"\"\n\n    def __init__(self, hparams):\n        DataBase.__init__(self, hparams)\n        with tf.name_scope(self.name, self.default_hparams()[\"name\"]):\n            self._make_data()\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dicitionary of default hyperparameters.\n\n        .. code-block:: python\n\n            {\n                # (1) Hyperparams specific to scalar dataset\n                \"dataset\": {\n                    \"files\": [],\n                    \"compression_type\": None,\n                    \"data_type\": \"int\",\n                    \"other_transformations\": [],\n                    \"data_name\": None,\n                }\n                # (2) General hyperparams\n                \"num_epochs\": 1,\n                \"batch_size\": 64,\n                \"allow_smaller_final_batch\": True,\n                \"shuffle\": True,\n                \"shuffle_buffer_size\": None,\n                \"shard_and_shuffle\": False,\n                \"num_parallel_calls\": 1,\n                \"prefetch_buffer_size\": 0,\n                \"max_dataset_size\": -1,\n                \"seed\": None,\n                \"name\": \"scalar_data\",\n            }\n\n        Here:\n\n        1. For the hyperparameters in the :attr:`\"dataset\"` field:\n\n            \"files\" : str or list\n                A (list of) file path(s).\n\n                Each line contains a single scalar number.\n\n            \"compression_type\" : str, optional\n                One of \"\" (no compression), \"ZLIB\", or \"GZIP\".\n\n            \"data_type\" : str\n                The scalar type. Currently supports \"int\" and \"float\".\n\n            \"other_transformations\" : list\n                A list of transformation functions or function names/paths to\n                further transform each single data instance.\n\n                (More documentations to be added.)\n\n            \"data_name\" : str\n                Name of the dataset.\n\n        2. For the **general** hyperparameters, see\n        :meth:`texar.data.DataBase.default_hparams` for details.\n\n        \"\"\"\n        hparams = DataBase.default_hparams()\n        hparams[\"name\"] = \"scalar_data\"\n        hparams.update({\n            \"dataset\": _default_scalar_dataset_hparams()\n        })\n        return hparams\n\n    @staticmethod\n    def _get_dtype(dtype_hparam):\n        if dtype_hparam == \"int\":\n            dtype = tf.int32\n        elif dtype_hparam == \"float\":\n            dtype = tf.float32\n        else:\n            raise ValueError(\"Unknown data type: \" + dtype_hparam)\n        return dtype\n\n    @staticmethod\n    def _make_processor(dataset_hparams, data_spec, chained=True,\n                        name_prefix=None):\n        # Create data decoder\n        decoder = ScalarDataDecoder(\n            ScalarData._get_dtype(dataset_hparams[\"data_type\"]),\n            data_name=name_prefix)\n        # Create other transformations\n        data_spec.add_spec(decoder=decoder)\n        # pylint: disable=protected-access\n        other_trans = MonoTextData._make_other_transformations(\n            dataset_hparams[\"other_transformations\"], data_spec)\n\n        data_spec.add_spec(name_prefix=name_prefix)\n\n        if chained:\n            chained_tran = dsutils.make_chained_transformation(\n                [decoder] + other_trans)\n            return chained_tran, data_spec\n        else:\n            return decoder, other_trans, data_spec\n\n    def _process_dataset(self, dataset, hparams, data_spec):\n        chained_tran, data_spec = self._make_processor(\n            hparams[\"dataset\"], data_spec,\n            name_prefix=hparams[\"dataset\"][\"data_name\"])\n        num_parallel_calls = hparams[\"num_parallel_calls\"]\n        dataset = dataset.map(\n            lambda *args: chained_tran(dsutils.maybe_tuple(args)),\n            num_parallel_calls=num_parallel_calls)\n\n        # Truncates data count\n        dataset = dataset.take(hparams[\"max_dataset_size\"])\n\n        return dataset, data_spec\n\n    def _make_data(self):\n        dataset_hparams = self._hparams.dataset\n\n        # Create and shuffle dataset\n        dataset = MonoTextData._make_mono_text_dataset(dataset_hparams)\n        dataset, dataset_size = self._shuffle_dataset(\n            dataset, self._hparams, self._hparams.dataset.files)\n        self._dataset_size = dataset_size\n\n        # Processing\n        # pylint: disable=protected-access\n        data_spec = dsutils._DataSpec(dataset=dataset,\n                                      dataset_size=self._dataset_size)\n        dataset, data_spec = self._process_dataset(dataset, self._hparams,\n                                                   data_spec)\n        self._data_spec = data_spec\n        self._decoder = data_spec.decoder # pylint: disable=no-member\n\n        # Batching\n        dataset = self._make_batch(dataset, self._hparams)\n\n        # Prefetching\n        if self._hparams.prefetch_buffer_size > 0:\n            dataset = dataset.prefetch(self._hparams.prefetch_buffer_size)\n\n        self._dataset = dataset\n\n    def list_items(self):\n        \"\"\"Returns the list of item names that the data can produce.\n\n        Returns:\n            A list of strings.\n        \"\"\"\n        return list(self._dataset.output_types.keys())\n\n    @property\n    def dataset(self):\n        \"\"\"The dataset.\n        \"\"\"\n        return self._dataset\n\n    def dataset_size(self):\n        \"\"\"Returns the number of data instances in the dataset.\n\n        Note that this is the total data count in the raw files, before any\n        filtering and truncation.\n        \"\"\"\n        if not self._dataset_size:\n            # pylint: disable=attribute-defined-outside-init\n            self._dataset_size = count_file_lines(\n                self._hparams.dataset.files)\n        return self._dataset_size\n\n    @property\n    def data_name(self):\n        \"\"\"The name of the data tensor, \"data\" by default if not specified in\n        :attr:`hparams`.\n        \"\"\"\n        return self._decoder.data_tensor_name\n\n"
  },
  {
    "path": "texar_repo/texar/data/data/scalar_data_test.py",
    "content": "# -*- coding: utf-8 -*-\n#\n\"\"\"\nUnit tests for data related operations.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport copy\nimport tempfile\nimport numpy as np\n\nimport tensorflow as tf\n\nimport texar as tx\n\nclass ScalarDataTest(tf.test.TestCase):\n    \"\"\"Tests scalar data class.\n    \"\"\"\n\n    def setUp(self):\n        tf.test.TestCase.setUp(self)\n\n        # Create test data\n        # pylint: disable=no-member\n        int_data = np.linspace(0, 100, num=101, dtype=np.int32).tolist()\n        int_data = [str(i) for i in int_data]\n        int_file = tempfile.NamedTemporaryFile()\n        int_file.write('\\n'.join(int_data).encode(\"utf-8\"))\n        int_file.flush()\n        self._int_file = int_file\n\n        self._int_hparams = {\n            \"num_epochs\": 1,\n            \"batch_size\": 1,\n            \"shuffle\": False,\n            \"dataset\": {\n                \"files\": self._int_file.name,\n                \"data_type\": \"int\",\n                \"data_name\": \"label\"\n            }\n        }\n\n        self._float_hparams = {\n            \"num_epochs\": 1,\n            \"batch_size\": 1,\n            \"shuffle\": False,\n            \"dataset\": {\n                \"files\": self._int_file.name,\n                \"data_type\": \"float\",\n                \"data_name\": \"feat\"\n            }\n        }\n\n\n    def _run_and_test(self, hparams):\n        # Construct database\n        scalar_data = tx.data.ScalarData(hparams)\n\n        self.assertEqual(scalar_data.list_items()[0],\n                         hparams[\"dataset\"][\"data_name\"])\n\n        iterator = scalar_data.dataset.make_initializable_iterator()\n        data_batch = iterator.get_next()\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            sess.run(tf.local_variables_initializer())\n            sess.run(tf.tables_initializer())\n            sess.run(iterator.initializer)\n\n            i = 0\n            while True:\n                try:\n                    # Run the logics\n                    data_batch_ = sess.run(data_batch)\n                    self.assertEqual(set(data_batch_.keys()),\n                                     set(scalar_data.list_items()))\n                    value = data_batch_[scalar_data.data_name][0]\n                    self.assertEqual(i, value)\n                    i += 1\n                    # pylint: disable=no-member\n                    if hparams[\"dataset\"][\"data_type\"] == \"int\":\n                        self.assertTrue(isinstance(value, np.int32))\n                    else:\n                        self.assertTrue(isinstance(value, np.float32))\n                except tf.errors.OutOfRangeError:\n                    print('Done -- epoch limit reached')\n                    break\n\n    def test_default_setting(self):\n        \"\"\"Tests the logics of ScalarData.\n        \"\"\"\n        self._run_and_test(self._int_hparams)\n        self._run_and_test(self._float_hparams)\n\n    def test_shuffle(self):\n        \"\"\"Tests results of toggling shuffle.\n        \"\"\"\n        hparams = copy.copy(self._int_hparams)\n        hparams[\"batch_size\"] = 10\n        scalar_data = tx.data.ScalarData(hparams)\n        iterator = scalar_data.dataset.make_initializable_iterator()\n        data_batch = iterator.get_next()\n\n        hparams_sfl = copy.copy(hparams)\n        hparams_sfl[\"shuffle\"] = True\n        scalar_data_sfl = tx.data.ScalarData(hparams_sfl)\n        iterator_sfl = scalar_data_sfl.dataset.make_initializable_iterator()\n        data_batch_sfl = iterator_sfl.get_next()\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            sess.run(tf.local_variables_initializer())\n            sess.run(tf.tables_initializer())\n            sess.run(iterator.initializer)\n            sess.run(iterator_sfl.initializer)\n\n            vals = []\n            vals_sfl = []\n            while True:\n                try:\n                    # Run the logics\n                    data_batch_, data_batch_sfl_ = sess.run([data_batch,\n                                                             data_batch_sfl])\n                    vals += data_batch_[scalar_data.data_name].tolist()\n                    vals_sfl += data_batch_sfl_[scalar_data.data_name].tolist()\n                except tf.errors.OutOfRangeError:\n                    print('Done -- epoch limit reached')\n                    break\n            self.assertEqual(len(vals), len(vals_sfl))\n            self.assertSetEqual(set(vals), set(vals_sfl))\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/data/data/text_data_base.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nBase text data class that is enherited by all text data classes.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport tensorflow as tf\n\nfrom texar.data.data.data_base import DataBase\nfrom texar.data.data import dataset_utils as dsutils\n\n# pylint: disable=protected-access, arguments-differ\n\n__all__ = [\n    \"TextDataBase\"\n]\n\nclass TextDataBase(DataBase): # pylint: disable=too-few-public-methods\n    \"\"\"Base class inheritted by all text data classes.\n    \"\"\"\n\n    def __init__(self, hparams):\n        DataBase.__init__(self, hparams)\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of default hyperparameters.\n\n        See the specific subclasses for the details.\n        \"\"\"\n        hparams = DataBase.default_hparams()\n        hparams.update({\n            \"bucket_boundaries\": [],\n            \"bucket_batch_sizes\": None,\n            \"bucket_length_fn\": None})\n        return hparams\n\n    @staticmethod\n    def _make_batch(dataset, hparams, element_length_func,\n                    padded_shapes=None, padding_values=None):\n        dataset = dataset.repeat(hparams.num_epochs)\n\n        batch_size = hparams[\"batch_size\"]\n        bucket_boundaries = hparams[\"bucket_boundaries\"]\n        if padded_shapes is None:\n            padded_shapes = dataset.output_shapes\n\n        if len(bucket_boundaries) == 0:\n            if hparams[\"allow_smaller_final_batch\"]:\n                dataset = dataset.padded_batch(\n                    batch_size, padded_shapes, padding_values=padding_values)\n            else:\n                dataset = dataset.apply(\n                    tf.contrib.data.padded_batch_and_drop_remainder(\n                        batch_size, padded_shapes,\n                        padding_values=padding_values))\n        else:\n            bucket_batch_size = hparams[\"bucket_batch_sizes\"]\n            if bucket_batch_size is None:\n                bucket_batch_size = [batch_size] * (len(bucket_boundaries) + 1)\n            dataset = dataset.apply(tf.contrib.data.bucket_by_sequence_length(\n                element_length_func, bucket_boundaries, bucket_batch_size,\n                padded_shapes=padded_shapes, padding_values=padding_values))\n            if not hparams[\"allow_smaller_final_batch\"]:\n                if len(set(bucket_batch_size)) > 1:\n                    raise ValueError(\n                        \"Batch size of every bucket must be the same if \"\n                        \"smaller final batch is not allowed.\")\n                batch_size = bucket_batch_size[0]\n                filter_fn = dsutils._make_smaller_batch_filter_fn(batch_size)\n                dataset = dataset.filter(\n                    lambda *args: filter_fn(dsutils.maybe_tuple(args)))\n\n        return dataset\n\n"
  },
  {
    "path": "texar_repo/texar/data/data_decoders.py",
    "content": "# -*- coding: utf-8 -*-\n# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nHelper functions and classes for decoding text data which are used after\nreading raw text data.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport numpy as np\n\nimport tensorflow as tf\nfrom tensorflow.contrib.slim.python.slim.data import data_decoder\n\nfrom texar.data.vocabulary import SpecialTokens\n\n# pylint: disable=too-many-instance-attributes, too-many-arguments,\n# pylint: disable=no-member, invalid-name\n\n__all__ = [\n    \"ScalarDataDecoder\",\n    \"TextDataDecoder\",\n    \"VarUttTextDataDecoder\"\n]\n\ndef _append_token(token):\n    return token is not None and token != \"\"\n\nclass ScalarDataDecoder(data_decoder.DataDecoder):\n    \"\"\"A data decoder that decodes a scalar, e.g., int label or float number.\n\n    The only operation is to cast the data into a specified data type.\n\n    Args:\n        dtype: A :tf_main:`tf DType <DType>` that data is cast into. Can be\n            `tf.int32` or `tf.float32`.\n        data_name (str): Name of the decoded data.\n    \"\"\"\n\n    def __init__(self, dtype=tf.int32, data_name=\"data\"):\n        self._dtype = dtype\n        self._data_name = data_name\n        if self._data_name is None:\n            self._data_name = \"data\"\n\n    def __call__(self, data):\n        outputs = self.decode(data, self.list_items())\n        return dict(zip(self.list_items(), outputs))\n\n    def decode(self, data, items):\n        \"\"\"Decodes the data to return the tensors specified by the list of\n        items.\n\n        Args:\n            data: The scalar data to decode.\n            items: A list of strings, each of which is the name of the resulting\n                tensors to retrieve.\n\n        Returns:\n            A list of tensors, each of which corresponds to each item.\n        \"\"\"\n        data = tf.reshape(data, shape=[])\n        if data.dtype is tf.string:\n            decoded_data = tf.string_to_number(data, out_type=self._dtype)\n        else:\n            decoded_data = tf.cast(data, self._dtype),\n        outputs = {\n            self._data_name: decoded_data\n        }\n        return [outputs[item] for item in items]\n\n    def list_items(self):\n        \"\"\"Returns the list of item names that the decoder can produce.\n\n        Returns:\n            A list of strings can be passed to :meth:`decode()`.\n        \"\"\"\n        return [self._data_name]\n\n    @property\n    def data_tensor_name(self):\n        \"\"\"The name of the data tensor.\n        \"\"\"\n        return self._data_name\n\nclass TextDataDecoder(data_decoder.DataDecoder):\n    \"\"\"A text data decoder that decodes raw text data.\n\n    Operations include splitting on word or character level, truncation,\n    inserting special tokens, mapping text units to indexes, etc.\n\n    Args:\n        split_level (str): The name of split level on which text sequence is\n            split. Either \"word\" or \"char\".\n        delimiter (str): The delimiter character used when splitting on word\n            level.\n        bos_token (str, optional): Special token added to the beginning of\n            sequences. If it is `None` (default) or an empty string, no\n            BOS token is added.\n        eos_token (str, optional): Special tokan added to the end of\n            sequences. If it is `None` (default) or an empty string, no EOS\n            token is added.\n        max_seq_length (int, optional): Maximum length of output sequences.\n            Tokens exceeding the maximum length will be truncated. The length\n            does not include any added bos_token and eos_token. If not\n            given, no truncation is performed.\n        token_to_id_map (optional): A\n            :class:`~tensorflow.contrib.lookup.HashTable` instance that maps\n            token strings to integer indexes. If not given, the decoder will\n            not decode text into indexes. :attr:`bos_token` and\n            :attr:`eos_token` (if given) should have entries in the\n            :attr:`token_to_id_map` (if given).\n        text_tensor_name (str): Name of the text tensor results. Used as a\n            key to retrieve the text tensor.\n        length_tensor_name (str): Name of the text length tensor results.\n        text_id_tensor_name (str): Name of the text index tensor results.\n    \"\"\"\n\n    def __init__(self,\n                 split_level=\"word\",\n                 delimiter=\" \",\n                 bos_token=None,\n                 eos_token=None,\n                 max_seq_length=None,\n                 token_to_id_map=None,\n                 text_tensor_name=\"text\",\n                 length_tensor_name=\"length\",\n                 text_id_tensor_name=\"text_ids\"):\n        self._split_level = split_level\n        self._delimiter = delimiter\n        self._bos_token = bos_token\n        self._eos_token = eos_token\n        self._max_seq_length = max_seq_length\n        self._token_to_id_map = token_to_id_map\n        self._text_tensor_name = text_tensor_name\n        self._text_id_tensor_name = text_id_tensor_name\n        self._length_tensor_name = length_tensor_name\n        self._added_length = 0\n\n    def __call__(self, data):\n        outputs = self.decode(data, self.list_items())\n        return dict(zip(self.list_items(), outputs))\n\n    def decode(self, data, items):\n        \"\"\"Decodes the data to return the tensors specified by the list of\n        items.\n\n        Args:\n            data: The text data to decode.\n            items: A list of strings, each of which is the name of the resulting\n                tensors to retrieve.\n\n        Returns:\n            A list of tensors, each of which corresponds to each item. If\n            `token_to_id_map` is not given when constructing the decoder,\n            returns `None` for the token index item.\n        \"\"\"\n        # Split\n        if self._split_level == \"word\":\n            tokens = tf.string_split([data], delimiter=self._delimiter).values\n        elif self._split_level == \"char\":\n            raise NotImplementedError\n        else:\n            raise ValueError(\"Unknown split level: %s\" % self._split_level)\n\n        # Truncate\n        if self._max_seq_length is not None:\n            tokens = tokens[:self._max_seq_length]\n\n        # Add BOS/EOS tokens\n        if _append_token(self._bos_token):\n            tokens = tf.concat([[self._bos_token], tokens], axis=0)\n            self._added_length += 1\n        if _append_token(self._eos_token):\n            tokens = tf.concat([tokens, [self._eos_token]], axis=0)\n            self._added_length += 1\n\n        # Map to index\n        token_ids = None\n        if self._token_to_id_map is not None:\n            token_ids = self._token_to_id_map.lookup(tokens)\n\n        outputs = {\n            self._text_tensor_name: tokens,\n            self._length_tensor_name: tf.size(tokens),\n            self._text_id_tensor_name: token_ids\n        }\n        return [outputs[item] for item in items]\n\n    def list_items(self):\n        \"\"\"Returns the list of item names that the decoder can produce.\n\n        Returns:\n            A list of strings can be passed to :meth:`decode()`.\n        \"\"\"\n        return [self._text_tensor_name,\n                self._length_tensor_name,\n                self._text_id_tensor_name]\n\n    @property\n    def text_tensor_name(self):\n        \"\"\"The name of text tensor.\n        \"\"\"\n        return self._text_tensor_name\n\n    @text_tensor_name.setter\n    def text_tensor_name(self, name):\n        self._text_tensor_name = name\n\n    @property\n    def length_tensor_name(self):\n        \"\"\"The name of length tensor.\n        \"\"\"\n        return self._length_tensor_name\n\n    @length_tensor_name.setter\n    def length_tensor_name(self, name):\n        self._length_tensor_name = name\n\n    @property\n    def text_id_tensor_name(self):\n        \"\"\"The name of text index tensor.\n        \"\"\"\n        return self._text_id_tensor_name\n\n    @text_id_tensor_name.setter\n    def text_id_tensor_name(self, name):\n        self._text_id_tensor_name = name\n\n    @property\n    def added_length(self):\n        \"\"\"The added text length due to appended bos and eos tokens.\n        \"\"\"\n        return self._added_length\n\nclass VarUttTextDataDecoder(data_decoder.DataDecoder):\n    \"\"\"A text data decoder that decodes raw text data. Each data is considered\n    to be multiple sentences concatenated by a delimiter.\n\n    Operations include splitting on word or character level, truncation,\n    inserting special tokens, mapping text units to indexes, etc.\n\n    Args:\n        split_level (str): The name of split level on which text sequence is\n            split. Either \"word\" or \"char\".\n        delimiter (str): The delimiter character used when splitting on word\n            level.\n        bos_token (str, optional): Special token added to the beginning of\n            sequences. If it is `None` (default) or an empty string, no\n            BOS token is added.\n        eos_token (str, optional): Special tokan added to the end of\n            sequences. If it is `None` (default) or an empty string, no EOS\n            token is added.\n        max_seq_length (int): Maximum length of each sequence.\n            Tokens exceed the maximum length will be truncated. Additional\n            padding will be done to ensure output sequence all reach this\n            number. The length does not include any added bos_token and eos_\n            token.\n        max_utterance_cnt (int): Maximum number of sequences.\n            Additional empty sentences will be added to\n            ensure the respective dimension of the output tensor has size\n            :attr:`max_utterance_cnt`. The output item named by\n            :meth:`utterance_cnt_tensor_name` contains the actual number of\n            utterance in the data.\n        token_to_id_map (optional): A\n            :class:`~tensorflow.contrib.lookup.HashTable` instance that maps\n            token strings to integer indexes. If not given, the decoder will\n            not decode text into indexes. :attr:`bos_token` and\n            :attr:`eos_token` (if given) should have entries in the\n            :attr:`token_to_id_map` (if given).\n        text_tensor_name (str): Name of the text tensor results. Used as a\n            key to retrieve the text tensor.\n        length_tensor_name (str): Name of the text length tensor results.\n        text_id_tensor_name (str): Name of the text index tensor results.\n    \"\"\"\n\n    def __init__(self,\n                 split_level=\"word\",\n                 delimiter=\" \",\n                 sentence_delimiter=\"|||\",\n                 bos_token=None,\n                 eos_token=None,\n                 max_seq_length=None,\n                 max_utterance_cnt=None,\n                 token_to_id_map=None,\n                 text_tensor_name=\"text\",\n                 length_tensor_name=\"length\",\n                 text_id_tensor_name=\"text_ids\",\n                 utterance_cnt_tensor_name=\"utterance_cnt\"):\n        self._split_level = split_level\n        self._delimiter = delimiter\n        self._bos_token = bos_token\n        self._eos_token = eos_token\n        self._max_seq_length = max_seq_length\n        self._token_to_id_map = token_to_id_map\n        self._text_tensor_name = text_tensor_name\n        self._text_id_tensor_name = text_id_tensor_name\n        self._length_tensor_name = length_tensor_name\n        self._utterance_cnt_tensor_name = utterance_cnt_tensor_name\n        self._sentence_delimiter = sentence_delimiter\n        self._max_utterance_cnt = max_utterance_cnt\n        self._added_length = 0\n\n    def __call__(self, data):\n        outputs = self.decode(data, self.list_items())\n        return dict(zip(self.list_items(), outputs))\n\n    def decode(self, data, items): # pylint: disable=too-many-locals\n        \"\"\"Decodes the data to return the tensors specified by the list of\n        items.\n\n        Args:\n            data: The text data to decode.\n            items: A list of strings, each of which is the name of the resulting\n                tensors to retrieve.\n\n        Returns:\n            A list of tensors, each of which corresponds to each item. If\n            `token_to_id_map` is not given when constructing the decoder,\n            returns `None` for the token index item.\n        \"\"\"\n\n        sentences = tf.string_split([data],\n                                    delimiter=self._sentence_delimiter).values\n\n        # Truncate utterances\n        if self._max_utterance_cnt:\n            sentences = sentences[:self._max_utterance_cnt]\n        utterance_cnt = tf.shape(sentences)[0]\n\n        # Get (max) sentence length\n        def _get_sent_length(s):\n            raw_length = tf.size(\n                tf.string_split([s], delimiter=self._delimiter).values)\n            if self._max_seq_length:\n                return tf.minimum(raw_length, self._max_seq_length)\n            else:\n                return raw_length\n\n        raw_sent_length = tf.map_fn(\n            _get_sent_length, sentences, dtype=tf.int32)\n        sent_length = self._max_seq_length\n        if not sent_length:\n            sent_length = tf.reduce_max(raw_sent_length)\n        if _append_token(self._eos_token):\n            raw_sent_length += 1\n            sent_length += 1\n            self._added_length += 1\n        if _append_token(self._bos_token):\n            raw_sent_length += 1\n            sent_length += 1\n            self._added_length += 1\n\n        def _trunc_and_pad(s, pad_token, max_length):\n            if self._max_seq_length:\n                s = s[:self._max_seq_length]\n            if _append_token(self._bos_token):\n                s = np.append([self._bos_token], s)\n            if _append_token(self._eos_token):\n                s = np.append(s, [self._eos_token])\n            s = np.append(s, [pad_token]*(max_length-s.size))\n            return s\n\n        # Split each sentence to tokens, and pad them to a same length.\n        # This is necessary to treat all sentences as a single tensor.\n        split_sentences = tf.map_fn(\n            lambda s: tf.py_func(\n                _trunc_and_pad,\n                [\n                    tf.string_split([s], delimiter=self._delimiter).values,\n                    SpecialTokens.PAD,\n                    sent_length\n                ],\n                tf.string),\n            sentences, dtype=tf.string\n        )\n\n        split_sentences = tf.reshape(split_sentences,\n                                     [utterance_cnt, sent_length])\n\n        # Map to index\n        token_ids = None\n        if self._token_to_id_map is not None:\n            token_ids = self._token_to_id_map.lookup(split_sentences)\n\n        outputs = {\n            self._text_tensor_name: split_sentences,\n            self._length_tensor_name: raw_sent_length,\n            self._utterance_cnt_tensor_name: tf.shape(sentences)[0],\n            self._text_id_tensor_name: token_ids\n        }\n        return [outputs[item] for item in items]\n\n    def list_items(self):\n        \"\"\"Returns the list of item names that the decoder can produce.\n\n        Returns:\n            A list of strings can be passed to :meth:`decode()`.\n        \"\"\"\n        return [\n            self._text_tensor_name,\n            self._length_tensor_name,\n            self._text_id_tensor_name,\n            self._utterance_cnt_tensor_name\n        ]\n\n    @property\n    def text_tensor_name(self):\n        \"\"\"The name of text tensor.\n        \"\"\"\n        return self._text_tensor_name\n\n    @text_tensor_name.setter\n    def text_tensor_name(self, name):\n        self._text_tensor_name = name\n\n    @property\n    def utterance_cnt_tensor_name(self):\n        \"\"\"The name of the utterance count tensor.\n        \"\"\"\n        return self._utterance_cnt_tensor_name\n\n    @property\n    def length_tensor_name(self):\n        \"\"\"The name of length tensor.\n        \"\"\"\n        return self._length_tensor_name\n\n    @length_tensor_name.setter\n    def length_tensor_name(self, name):\n        self._length_tensor_name = name\n\n    @property\n    def text_id_tensor_name(self):\n        \"\"\"The name of text index tensor.\n        \"\"\"\n        return self._text_id_tensor_name\n\n    @text_id_tensor_name.setter\n    def text_id_tensor_name(self, name):\n        self._text_id_tensor_name = name\n\n    @property\n    def added_length(self):\n        \"\"\"The added text length due to appended bos and eos tokens.\n        \"\"\"\n        return self._added_length\n"
  },
  {
    "path": "texar_repo/texar/data/data_utils.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nVarious utilities specific to data processing.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport os\nimport sys\nimport tarfile\nimport zipfile\nimport collections\nimport numpy as np\nfrom six.moves import urllib\nimport requests\n\nimport tensorflow as tf\n\nfrom texar.utils import utils_io\n\n# pylint: disable=invalid-name, too-many-branches\n\n__all__ = [\n    \"maybe_download\",\n    \"read_words\",\n    \"make_vocab\",\n    \"count_file_lines\"\n]\n\nPy3 = sys.version_info[0] == 3\n\ndef maybe_download(urls, path, filenames=None, extract=False):\n    \"\"\"Downloads a set of files.\n\n    Args:\n        urls: A (list of) urls to download files.\n        path (str): The destination path to save the files.\n        filenames: A (list of) strings of the file names. If given,\n            must have the same length with :attr:`urls`. If `None`,\n            filenames are extracted from :attr:`urls`.\n        extract (bool): Whether to extract compressed files.\n\n    Returns:\n        A list of paths to the downloaded files.\n    \"\"\"\n    utils_io.maybe_create_dir(path)\n\n    if not isinstance(urls, (list, tuple)):\n        urls = [urls]\n    if filenames is not None:\n        if not isinstance(filenames, (list, tuple)):\n            filenames = [filenames]\n        if len(urls) != len(filenames):\n            raise ValueError(\n                '`filenames` must have the same number of elements as `urls`.')\n\n    result = []\n    for i, url in enumerate(urls):\n        if filenames is not None:\n            filename = filenames[i]\n        elif 'drive.google.com' in url:\n            filename = _extract_google_drive_file_id(url)\n        else:\n            filename = url.split('/')[-1]\n            # If downloading from GitHub, remove suffix ?raw=True\n            # from local filename\n            if filename.endswith(\"?raw=true\"):\n                filename = filename[:-9]\n\n        filepath = os.path.join(path, filename)\n        result.append(filepath)\n\n        if not tf.gfile.Exists(filepath):\n            if 'drive.google.com' in url:\n                filepath = _download_from_google_drive(url, filename, path)\n            else:\n                filepath = _download(url, filename, path)\n\n            if extract:\n                tf.logging.info('Extract %s', filepath)\n                if tarfile.is_tarfile(filepath):\n                    tarfile.open(filepath, 'r').extractall(path)\n                elif zipfile.is_zipfile(filepath):\n                    with zipfile.ZipFile(filepath) as zfile:\n                        zfile.extractall(path)\n                else:\n                    tf.logging.info(\"Unknown compression type. Only .tar.gz, \"\n                                    \".tar.bz2, .tar, and .zip are supported\")\n\n    return result\n\ndef _download(url, filename, path):\n    def _progress(count, block_size, total_size):\n        percent = float(count * block_size) / float(total_size) * 100.\n        # pylint: disable=cell-var-from-loop\n        sys.stdout.write('\\r>> Downloading %s %.1f%%' %\n                         (filename, percent))\n        sys.stdout.flush()\n\n    filepath = os.path.join(path, filename)\n    filepath, _ = urllib.request.urlretrieve(url, filepath, _progress)\n    print()\n    statinfo = os.stat(filepath)\n    print('Successfully downloaded {} {} bytes.'.format(\n        filename, statinfo.st_size))\n\n    return filepath\n\ndef _extract_google_drive_file_id(url):\n    # id is between `/d/` and '/'\n    url_suffix = url[url.find('/d/')+3:]\n    file_id = url_suffix[:url_suffix.find('/')]\n    return file_id\n\ndef _download_from_google_drive(url, filename, path):\n    \"\"\"Adapted from `https://github.com/saurabhshri/gdrive-downloader`\n    \"\"\"\n    def _get_confirm_token(response):\n        for key, value in response.cookies.items():\n            if key.startswith('download_warning'):\n                return value\n        return None\n\n    file_id = _extract_google_drive_file_id(url)\n\n    gurl = \"https://docs.google.com/uc?export=download\"\n    sess = requests.Session()\n    response = sess.get(gurl, params={'id': file_id}, stream=True)\n    token = _get_confirm_token(response)\n\n    if token:\n        params = {'id': file_id, 'confirm': token}\n        response = sess.get(gurl, params=params, stream=True)\n\n    filepath = os.path.join(path, filename)\n    CHUNK_SIZE = 32768\n    with tf.gfile.GFile(filepath, \"wb\") as f:\n        for chunk in response.iter_content(CHUNK_SIZE):\n            if chunk:\n                f.write(chunk)\n\n    print('Successfully downloaded {}.'.format(filename))\n\n    return filepath\n\ndef read_words(filename, newline_token=None):\n    \"\"\"Reads word from a file.\n\n    Args:\n        filename (str): Path to the file.\n        newline_token (str, optional): The token to replace the original newline\n            token \"\\\\\\\\n\". For example,\n            `newline_token=tx.data.SpecialTokens.EOS`.\n            If `None`, no replacement is performed.\n\n    Returns:\n        A list of words.\n    \"\"\"\n    with tf.gfile.GFile(filename, \"r\") as f:\n        if Py3:\n            if newline_token is None:\n                return f.read().split()\n            else:\n                return f.read().replace(\"\\n\", newline_token).split()\n        else:\n            if newline_token is None:\n                return f.read().decode(\"utf-8\").split()\n            else:\n                return (f.read().decode(\"utf-8\")\n                        .replace(\"\\n\", newline_token).split())\n\n\ndef make_vocab(filenames, max_vocab_size=-1, newline_token=None,\n               return_type=\"list\", return_count=False):\n    \"\"\"Builds vocab of the files.\n\n    Args:\n        filenames (str): A (list of) files.\n        max_vocab_size (int): Maximum size of the vocabulary. Low frequency\n            words that exceeding the limit will be discarded.\n            Set to `-1` (default) if no truncation is wanted.\n        newline_token (str, optional): The token to replace the original newline\n            token \"\\\\\\\\n\". For example,\n            `newline_token=tx.data.SpecialTokens.EOS`.\n            If `None`, no replacement is performed.\n        return_type (str): Either \"list\" or \"dict\". If \"list\" (default), this\n            function returns a list of words sorted by frequency. If \"dict\",\n            this function returns a dict mapping words to their index sorted\n            by frequency.\n        return_count (bool): Whether to return word counts. If `True` and\n            :attr:`return_type` is \"dict\", then a count dict is returned, which\n            is a mapping from words to their frequency.\n\n    Returns:\n        - If :attr:`return_count` is False, returns a list or dict containing \\\n        the vocabulary words.\n\n        - If :attr:`return_count` if True, returns a pair of list or dict \\\n        `(a, b)`, where `a` is a list or dict containing the vocabulary \\\n        words, `b` is a list of dict containing the word counts.\n    \"\"\"\n    if not isinstance(filenames, (list, tuple)):\n        filenames = [filenames]\n\n    words = []\n    for fn in filenames:\n        words += read_words(fn, newline_token=newline_token)\n\n    counter = collections.Counter(words)\n    count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0]))\n\n    words, counts = list(zip(*count_pairs))\n    if max_vocab_size >= 0:\n        words = words[:max_vocab_size]\n        counts = counts[:max_vocab_size]\n\n    if return_type == \"list\":\n        if not return_count:\n            return words\n        else:\n            return words, counts\n    elif return_type == \"dict\":\n        word_to_id = dict(zip(words, range(len(words))))\n        if not return_count:\n            return word_to_id\n        else:\n            word_to_count = dict(zip(words, counts))\n            return word_to_id, word_to_count\n    else:\n        raise ValueError(\"Unknown return_type: {}\".format(return_type))\n\n\ndef count_file_lines(filenames):\n    \"\"\"Counts the number of lines in the file(s).\n    \"\"\"\n    def _count_lines(fn):\n        with open(fn, \"rb\") as f:\n            i = -1\n            for i, _ in enumerate(f):\n                pass\n            return i + 1\n\n    if not isinstance(filenames, (list, tuple)):\n        filenames = [filenames]\n    num_lines = np.sum([_count_lines(fn) for fn in filenames])\n    return num_lines\n\n"
  },
  {
    "path": "texar_repo/texar/data/data_utils_test.py",
    "content": "# -*- coding: utf-8 -*-\n#\n\"\"\"\nUnit tests for data utils.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport tempfile\n\nimport tensorflow as tf\n\nfrom texar.data import data_utils\n\n\nclass CountFileLinesTest(tf.test.TestCase):\n    \"\"\"Tests :func:`texar.data.data_utils.count_file_lines`.\n    \"\"\"\n\n    def test_load_glove(self):\n        \"\"\"Tests the load_glove function.\n        \"\"\"\n        file_1 = tempfile.NamedTemporaryFile(mode=\"w+\")\n        num_lines = data_utils.count_file_lines(file_1.name)\n        self.assertEqual(num_lines, 0)\n\n        file_2 = tempfile.NamedTemporaryFile(mode=\"w+\")\n        file_2.write('\\n'.join(['x']*5))\n        file_2.flush()\n        num_lines = data_utils.count_file_lines(\n            [file_1.name, file_2.name, file_2.name])\n        self.assertEqual(num_lines, 0+5+5)\n\n\nif __name__ == \"__main__\":\n    tf.test.main()\n\n"
  },
  {
    "path": "texar_repo/texar/data/embedding.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nHelper functions and classes for embedding processing.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport tensorflow as tf\nfrom tensorflow import gfile\nimport numpy as np\n\nfrom texar.utils import utils\nfrom texar.hyperparams import HParams\n\n__all__ = [\n    \"load_word2vec\",\n    \"load_glove\",\n    \"Embedding\"\n]\n\ndef load_word2vec(filename, vocab, word_vecs):\n    \"\"\"Loads embeddings in the word2vec binary format which has a header line\n    containing the number of vectors and their dimensionality (two integers),\n    followed with number-of-vectors lines each of which is formatted as\n    '<word-string> <embedding-vector>'.\n\n    Args:\n        filename (str): Path to the embedding file.\n        vocab (dict): A dictionary that maps token strings to integer index.\n            Tokens not in :attr:`vocab` are not read.\n        word_vecs: A 2D numpy array of shape `[vocab_size, embed_dim]`\n            which is updated as reading from the file.\n\n    Returns:\n        The updated :attr:`word_vecs`.\n    \"\"\"\n    with gfile.GFile(filename, \"rb\") as fin:\n        header = fin.readline()\n        vocab_size, vector_size = [int(s) for s in header.split()]\n        if vector_size != word_vecs.shape[1]:\n            raise ValueError(\"Inconsistent word vector sizes: %d vs %d\" %\n                             (vector_size, word_vecs.shape[1]))\n        binary_len = np.dtype('float32').itemsize * vector_size\n        for _ in np.arange(vocab_size):\n            chars = []\n            while True:\n                char = fin.read(1)\n                if char == b' ':\n                    break\n                if char != b'\\n':\n                    chars.append(char)\n            word = b''.join(chars)\n            word = tf.compat.as_text(word)\n            if word in vocab:\n                word_vecs[vocab[word]] = np.fromstring(\n                    fin.read(binary_len), dtype='float32')\n            else:\n                fin.read(binary_len)\n    return word_vecs\n\ndef load_glove(filename, vocab, word_vecs):\n    \"\"\"Loads embeddings in the glove text format in which each line is\n    '<word-string> <embedding-vector>'. Dimensions of the embedding vector\n    are separated with whitespace characters.\n\n    Args:\n        filename (str): Path to the embedding file.\n        vocab (dict): A dictionary that maps token strings to integer index.\n            Tokens not in :attr:`vocab` are not read.\n        word_vecs: A 2D numpy array of shape `[vocab_size, embed_dim]`\n            which is updated as reading from the file.\n\n    Returns:\n        The updated :attr:`word_vecs`.\n    \"\"\"\n    with gfile.GFile(filename) as fin:\n        for line in fin:\n            vec = line.strip().split()\n            if len(vec) == 0:\n                continue\n            word, vec = vec[0], vec[1:]\n            word = tf.compat.as_text(word)\n            if word not in vocab:\n                continue\n            if len(vec) != word_vecs.shape[1]:\n                raise ValueError(\"Inconsistent word vector sizes: %d vs %d\" %\n                                 (len(vec), word_vecs.shape[1]))\n            word_vecs[vocab[word]] = np.array([float(v) for v in vec])\n    return word_vecs\n\n\nclass Embedding(object):\n    \"\"\"Embedding class that loads token embedding vectors from file. Token\n    embeddings not in the embedding file are initialized as specified in\n    :attr:`hparams`.\n\n    Args:\n        vocab (dict): A dictionary that maps token strings to integer index.\n        read_fn: Callable that takes `(filename, vocab, word_vecs)` and\n            returns the updated `word_vecs`. E.g.,\n            :func:`~texar.data.embedding.load_word2vec` and\n            :func:`~texar.data.embedding.load_glove`.\n    \"\"\"\n    def __init__(self, vocab, hparams=None):\n        self._hparams = HParams(hparams, self.default_hparams())\n\n        # Initialize embeddings\n        init_fn_kwargs = self._hparams.init_fn.kwargs.todict()\n        if \"shape\" in init_fn_kwargs or \"size\" in init_fn_kwargs:\n            raise ValueError(\"Argument 'shape' or 'size' must not be \"\n                             \"specified. They are inferred automatically.\")\n        init_fn = utils.get_function(\n            self._hparams.init_fn.type,\n            [\"numpy.random\", \"numpy\", \"texar.custom\"])\n\n        try:\n            self._word_vecs = init_fn(size=[len(vocab), self._hparams.dim],\n                                      **init_fn_kwargs)\n        except TypeError:\n            self._word_vecs = init_fn(shape=[len(vocab), self._hparams.dim],\n                                      **init_fn_kwargs)\n\n        # Optionally read embeddings from file\n        if self._hparams.file is not None and self._hparams.file != \"\":\n            read_fn = utils.get_function(\n                self._hparams.read_fn,\n                [\"texar.data.embedding\", \"texar.data\", \"texar.custom\"])\n\n            self._word_vecs = \\\n                read_fn(self._hparams.file, vocab, self._word_vecs)\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values:\n\n        .. role:: python(code)\n           :language: python\n\n        .. code-block:: python\n\n            {\n                \"file\": \"\",\n                \"dim\": 50,\n                \"read_fn\": \"load_word2vec\",\n                \"init_fn\": {\n                    \"type\": \"numpy.random.uniform\",\n                    \"kwargs\": {\n                        \"low\": -0.1,\n                        \"high\": 0.1,\n                    }\n                },\n            }\n\n        Here:\n\n        \"file\" : str\n            Path to the embedding file. If not provided, all embeddings are\n            initialized with the initialization function.\n\n        \"dim\": int\n            Dimension size of each embedding vector\n\n        \"read_fn\" : str or callable\n            Function to read the embedding file. This can be the function,\n            or its string name or full module path. E.g.,\n\n            .. code-block:: python\n\n                \"read_fn\": texar.data.load_word2vec\n                \"read_fn\": \"load_word2vec\"\n                \"read_fn\": \"texar.data.load_word2vec\"\n                \"read_fn\": \"my_module.my_read_fn\"\n\n            If function string name is used, the function must be in\n            one of the modules: :mod:`texar.data` or :mod:`texar.custom`.\n\n            The function must have the same signature as with\n            :func:`load_word2vec`.\n\n        \"init_fn\" : dict\n            Hyperparameters of the initialization function used to initialize\n            embedding of tokens missing in the embedding\n            file.\n\n            The function must accept argument named `size` or `shape` to\n            specify the output shape, and return a numpy array of the shape.\n\n            The `dict` has the following fields:\n\n                \"type\" : str or callable\n                    The initialization function. Can be either the function,\n                    or its string name or full module path.\n\n                \"kwargs\" : dict\n                    Keyword arguments for calling the function. The function\n                    is called with :python:`init_fn(size=[.., ..], **kwargs)`.\n        \"\"\"\n        return {\n            \"file\": \"\",\n            \"dim\": 50,\n            \"read_fn\": \"load_word2vec\",\n            \"init_fn\": {\n                \"type\": \"numpy.random.uniform\",\n                \"kwargs\": {\n                    \"low\": -0.1,\n                    \"high\": 0.1,\n                },\n            },\n            \"@no_typecheck\": [\"read_fn\", \"init_fn\"]\n        }\n\n    @property\n    def word_vecs(self):\n        \"\"\"2D numpy array of shape `[vocab_size, embedding_dim]`.\n        \"\"\"\n        return self._word_vecs\n\n    @property\n    def vector_size(self):\n        \"\"\"The embedding dimention size.\n        \"\"\"\n        return self._hparams.dim\n"
  },
  {
    "path": "texar_repo/texar/data/embedding_test.py",
    "content": "# -*- coding: utf-8 -*-\n# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nUnit tests for embedding related operations.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport sys\nimport tempfile\nimport numpy as np\n\nimport tensorflow as tf\n\nfrom texar.data import embedding\n\nPy3 = sys.version_info[0] == 3 # pylint: disable=invalid-name\n\nclass EmbeddingTest(tf.test.TestCase):\n    \"\"\"Tests embedding related operations.\n    \"\"\"\n\n    def test_load_glove(self):\n        \"\"\"Tests the load_glove function.\n        \"\"\"\n        word_vec_lines = [\"word 1.2 3.4 5.6\", \"词 1. 3. 5.\"]\n        glove_file = tempfile.NamedTemporaryFile(mode=\"w+\")\n        if Py3:\n            glove_file.write('\\n'.join(word_vec_lines))\n        else:\n            glove_file.write('\\n'.join(word_vec_lines).encode(\"utf-8\"))\n        glove_file.flush()\n        vocab = {\"word\": 0, \"词\": 1}\n        word_vecs = np.zeros([2, 3])\n\n        word_vecs = embedding.load_glove(glove_file.name, vocab, word_vecs)\n\n        self.assertEqual(word_vecs.shape[0], 2)\n        self.assertEqual(word_vecs.shape[1], 3)\n        np.testing.assert_array_equal(word_vecs[0], [1.2, 3.4, 5.6])\n        np.testing.assert_array_equal(word_vecs[1], [1., 3., 5.])\n\n    def test_load_word2vec(self):\n        \"\"\"Tests the load_word2vec function.\n        \"\"\"\n        header = \"2 3\"\n        words = [\"word\", \"词\"]\n        vec = np.array([1.2, 3.4, 5.6], dtype='float32')\n        w2v_file = tempfile.NamedTemporaryFile()\n        w2v_file.write(tf.compat.as_bytes(header + \"\\n\"))\n        for word in words:\n            w2v_file.write(tf.compat.as_bytes(word + \" \"))\n            w2v_file.write(vec.tostring() + b'\\n')\n        w2v_file.flush()\n        vocab = {\"word\": 0, \"词\": 1}\n        word_vecs = np.zeros([2, 3])\n\n        word_vecs = embedding.load_word2vec(w2v_file.name, vocab, word_vecs)\n\n        self.assertEqual(word_vecs.shape[0], 2)\n        self.assertEqual(word_vecs.shape[1], 3)\n        np.testing.assert_array_equal(word_vecs[0], vec)\n        np.testing.assert_array_equal(word_vecs[1], vec)\n\n    def test_embedding(self):\n        \"\"\"Tests :class:`texar.data.embedding.Embedding`.\n        \"\"\"\n        vocab = {\"word\": 0, \"词\": 1}\n        emb = embedding.Embedding(vocab)\n        self.assertEqual(len(emb.word_vecs), len(vocab))\n\nif __name__ == \"__main__\":\n    tf.test.main()\n\n"
  },
  {
    "path": "texar_repo/texar/data/vocabulary.py",
    "content": "# -*- coding: utf-8 -*-\n# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nHelper functions and classes for vocabulary processing.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport warnings\nfrom collections import defaultdict\n\nimport tensorflow as tf\nfrom tensorflow import gfile\nimport numpy as np\n\nfrom texar.utils.utils import dict_lookup\n\n# pylint: disable=too-few-public-methods, invalid-name\n# pylint: disable=too-many-instance-attributes, too-many-arguments\n\n__all__ = [\n    \"SpecialTokens\",\n    \"Vocab\"\n]\n\nclass SpecialTokens(object):\n    \"\"\"Special tokens, including :attr:`PAD`, :attr:`BOS`, :attr:`EOS`,\n    :attr:`UNK`. These tokens will by default have token ids 0, 1, 2, 3,\n    respectively.\n    \"\"\"\n    PAD = \"<PAD>\"\n    BOS = \"<BOS>\"\n    EOS = \"<EOS>\"\n    UNK = \"<UNK>\"\n\n\ndef _make_defaultdict(keys, values, default_value):\n    \"\"\"Creates a python defaultdict.\n\n    Args:\n        keys (list): Keys of the dictionary.\n        values (list): Values correspond to keys. The two lists :attr:`keys` and\n            :attr:`values` must be of the same length.\n        default_value: default value returned when key is missing.\n\n    Returns:\n        defaultdict: A python `defaultdict` instance that maps keys to values.\n    \"\"\"\n    dict_ = defaultdict(lambda: default_value)\n    for k, v in zip(keys, values):\n        dict_[k] = v\n\n    return dict_\n\n\nclass Vocab(object):\n    \"\"\"Vocabulary class that loads vocabulary from file, and maintains mapping\n    tables between token strings and indexes.\n\n    Each line of the vocab file should contains one vocabulary token, e.g.,::\n\n        vocab_token_1\n        vocab token 2\n        vocab       token | 3 .\n        ...\n\n    Args:\n        filename (str): Path to the vocabulary file where each line contains\n            one token.\n        bos_token (str): A special token that will be added to the beginning of\n            sequences.\n        eos_token (str): A special token that will be added to the end of\n            sequences.\n        unk_token (str): A special token that will replace all unknown tokens\n            (tokens not included in the vocabulary).\n        pad_token (str): A special token that is used to do padding.\n    \"\"\"\n\n    def __init__(self,\n                 filename,\n                 pad_token=SpecialTokens.PAD,\n                 bos_token=SpecialTokens.BOS,\n                 eos_token=SpecialTokens.EOS,\n                 unk_token=SpecialTokens.UNK):\n        self._filename = filename\n        self._pad_token = pad_token\n        self._bos_token = bos_token\n        self._eos_token = eos_token\n        self._unk_token = unk_token\n\n        self._id_to_token_map, self._token_to_id_map, \\\n        self._id_to_token_map_py, self._token_to_id_map_py = \\\n            self.load(self._filename)\n\n    def load(self, filename):\n        \"\"\"Loads the vocabulary from the file.\n\n        Args:\n            filename (str): Path to the vocabulary file.\n\n        Returns:\n            A tuple of TF and python mapping tables between word string and\n            index, (:attr:`id_to_token_map`, :attr:`token_to_id_map`,\n            :attr:`id_to_token_map_py`, :attr:`token_to_id_map_py`), where\n            :attr:`id_to_token_map` and :attr:`token_to_id_map` are\n            TF :tf_main:`HashTable <contrib/lookup/HashTable>` instances,\n            and :attr:`id_to_token_map_py` and\n            :attr:`token_to_id_map_py` are python `defaultdict` instances.\n        \"\"\"\n        with gfile.GFile(filename) as vocab_file:\n            # Converts to 'unicode' (Python 2) or 'str' (Python 3)\n            vocab = list(tf.compat.as_text(line.strip()) for line in vocab_file)\n\n        warnings.simplefilter(\"ignore\", UnicodeWarning)\n\n        if self._bos_token in vocab:\n            raise ValueError(\"Special begin-of-seq token already exists in the \"\n                             \"vocabulary: '%s'\" % self._bos_token)\n        if self._eos_token in vocab:\n            raise ValueError(\"Special end-of-seq token already exists in the \"\n                             \"vocabulary: '%s'\" % self._eos_token)\n        if self._unk_token in vocab:\n            raise ValueError(\"Special UNK token already exists in the \"\n                             \"vocabulary: '%s'\" % self._unk_token)\n        if self._pad_token in vocab:\n            raise ValueError(\"Special padding token already exists in the \"\n                             \"vocabulary: '%s'\" % self._pad_token)\n\n        warnings.simplefilter(\"default\", UnicodeWarning)\n\n        # Places _pad_token at the beginning to make sure it take index 0.\n        vocab = [self._pad_token, self._bos_token, self._eos_token,\n                 self._unk_token] + vocab\n        # Must make sure this is consistent with the above line\n        unk_token_idx = 3\n        vocab_size = len(vocab)\n        vocab_idx = np.arange(vocab_size)\n\n        # Creates TF maps\n        id_to_token_map = tf.contrib.lookup.HashTable(\n            tf.contrib.lookup.KeyValueTensorInitializer(\n                vocab_idx, vocab, key_dtype=tf.int64, value_dtype=tf.string),\n            self._unk_token)\n\n        token_to_id_map = tf.contrib.lookup.HashTable(\n            tf.contrib.lookup.KeyValueTensorInitializer(\n                vocab, vocab_idx, key_dtype=tf.string, value_dtype=tf.int64),\n            unk_token_idx)\n\n        # Creates python maps to interface with python code\n        id_to_token_map_py = _make_defaultdict(\n            vocab_idx, vocab, self._unk_token)\n        token_to_id_map_py = _make_defaultdict(\n            vocab, vocab_idx, unk_token_idx)\n\n        return id_to_token_map, token_to_id_map, \\\n               id_to_token_map_py, token_to_id_map_py\n\n    def map_ids_to_tokens(self, ids):\n        \"\"\"Maps ids into text tokens.\n\n        The returned tokens are a Tensor.\n\n        Args:\n            ids: An `int` tensor of token ids.\n\n        Returns:\n            A tensor of text tokens of the same shape.\n        \"\"\"\n        return self.id_to_token_map.lookup(tf.to_int64(ids))\n\n    def map_tokens_to_ids(self, tokens):\n        \"\"\"Maps text tokens into ids.\n\n        The returned ids are a Tensor.\n\n        Args:\n            tokens: An tensor of text tokens.\n\n        Returns:\n            A tensor of token ids of the same shape.\n        \"\"\"\n        return self.token_to_id_map.lookup(tokens)\n\n    def map_ids_to_tokens_py(self, ids):\n        \"\"\"Maps ids into text tokens.\n\n        The input :attr:`ids` and returned tokens are both python\n        arrays or list.\n\n        Args:\n            ids: An `int` numpy arry or (possibly nested) list of token ids.\n\n        Returns:\n            A numpy array of text tokens of the same shape as :attr:`ids`.\n        \"\"\"\n        return dict_lookup(self.id_to_token_map_py, ids, self.unk_token)\n\n    def map_tokens_to_ids_py(self, tokens):\n        \"\"\"Maps text tokens into ids.\n\n        The input :attr:`tokens` and returned ids are both python\n        arrays or list.\n\n        Args:\n            tokens: A numpy array or (possibly nested) list of text tokens.\n\n        Returns:\n            A numpy array of token ids of the same shape as :attr:`tokens`.\n        \"\"\"\n        return dict_lookup(self.token_to_id_map_py, tokens, self.unk_token_id)\n\n    @property\n    def id_to_token_map(self):\n        \"\"\"The :tf_main:`HashTable <contrib/lookup/HashTable>`instance that\n        maps from token index to the string form.\n        \"\"\"\n        return self._id_to_token_map\n\n    @property\n    def token_to_id_map(self):\n        \"\"\"The :tf_main:`HashTable <contrib/lookup/HashTable>` instance\n        that maps from token string to the index.\n        \"\"\"\n        return self._token_to_id_map\n\n    @property\n    def id_to_token_map_py(self):\n        \"\"\"The python `defaultdict` instance that maps from token index to the\n        string form.\n        \"\"\"\n        return self._id_to_token_map_py\n\n    @property\n    def token_to_id_map_py(self):\n        \"\"\"The python `defaultdict` instance that maps from token string to the\n        index.\n        \"\"\"\n        return self._token_to_id_map_py\n\n    @property\n    def size(self):\n        \"\"\"The vocabulary size.\n        \"\"\"\n        return len(self.token_to_id_map_py)\n\n    @property\n    def bos_token(self):\n        \"\"\"A string of the special token indicating the beginning of sequence.\n        \"\"\"\n        return self._bos_token\n\n    @property\n    def bos_token_id(self):\n        \"\"\"The `int` index of the special token indicating the beginning\n        of sequence.\n        \"\"\"\n        return self.token_to_id_map_py[self._bos_token]\n\n    @property\n    def eos_token(self):\n        \"\"\"A string of the special token indicating the end of sequence.\n        \"\"\"\n        return self._eos_token\n\n    @property\n    def eos_token_id(self):\n        \"\"\"The `int` index of the special token indicating the end\n        of sequence.\n        \"\"\"\n        return self.token_to_id_map_py[self._eos_token]\n\n    @property\n    def unk_token(self):\n        \"\"\"A string of the special token indicating unknown token.\n        \"\"\"\n        return self._unk_token\n\n    @property\n    def unk_token_id(self):\n        \"\"\"The `int` index of the special token indicating unknown token.\n        \"\"\"\n        return self.token_to_id_map_py[self._unk_token]\n\n    @property\n    def pad_token(self):\n        \"\"\"A string of the special token indicating padding token. The\n        default padding token is an empty string.\n        \"\"\"\n        return self._pad_token\n\n    @property\n    def pad_token_id(self):\n        \"\"\"The `int` index of the special token indicating padding token.\n        \"\"\"\n        return self.token_to_id_map_py[self._pad_token]\n\n    @property\n    def special_tokens(self):\n        \"\"\"The list of special tokens\n        [:attr:`pad_token`, :attr:`bos_token`, :attr:`eos_token`,\n        :attr:`unk_token`].\n        \"\"\"\n        return [self._pad_token, self._bos_token, self._eos_token,\n                self._unk_token]\n"
  },
  {
    "path": "texar_repo/texar/data/vocabulary_test.py",
    "content": "# -*- coding: utf-8 -*-\n#\n\"\"\"\nUnit tests for vocabulary related operations.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport tempfile\nimport tensorflow as tf\n\nfrom texar.data import vocabulary\n\n# pylint: disable=protected-access\n\nclass VocabularyTest(tf.test.TestCase):\n    \"\"\"Tests vocabulary related operations.\n    \"\"\"\n\n    def test_make_defaultdict(self):\n        \"\"\"Tests the _make_defaultdict function.\n        \"\"\"\n        keys = ['word', '词']\n        values = [0, 1]\n        default_value = -1\n\n        dict_ = vocabulary._make_defaultdict(keys, values, default_value)\n\n        self.assertEqual(len(dict_), 2)\n        self.assertEqual(dict_['word'], 0)\n        self.assertEqual(dict_['词'], 1)\n        self.assertEqual(dict_['sth_else'], -1)\n\n    def test_vocab_construction(self):\n        \"\"\"Test vocabulary construction.\n        \"\"\"\n        vocab_list = ['word', '词']\n        vocab_file = tempfile.NamedTemporaryFile()\n        vocab_file.write('\\n'.join(vocab_list).encode(\"utf-8\"))\n        vocab_file.flush()\n\n        vocab = vocabulary.Vocab(vocab_file.name)\n\n        self.assertEqual(vocab.size, len(vocab_list) + 4)\n        self.assertEqual(\n            set(vocab.token_to_id_map_py.keys()),\n            set(['word', '词'] + vocab.special_tokens))\n\n        # Tests UNK token\n        unk_token_id = vocab.token_to_id_map_py['new']\n        unk_token_text = vocab.id_to_token_map_py[unk_token_id]\n        self.assertEqual(unk_token_text, vocab.unk_token)\n\n\nif __name__ == \"__main__\":\n    tf.test.main()\n\n"
  },
  {
    "path": "texar_repo/texar/evals/__init__.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nModules of texar library evals.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=wildcard-import\n\nfrom texar.evals.bleu_moses import *\nfrom texar.evals.bleu import *\nfrom texar.evals.metrics import *\n"
  },
  {
    "path": "texar_repo/texar/evals/bleu.py",
    "content": "# Copyright 2017 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n# Modifications copyright (C) 2018 Texar\n# ==============================================================================\n\"\"\"\nPython implementation of BLEU and smoothed BLEU adapted from:\n    `https://github.com/tensorflow/nmt/blob/master/nmt/scripts/bleu.py`\n\nThis module provides a Python implementation of BLEU and smoothed BLEU.\nSmooth BLEU is computed following the method outlined in the paper:\n\n    (Lin et al. 2004) ORANGE: a method for evaluating automatic evaluation\n    metrics for maching translation.\n    Chin-Yew Lin, Franz Josef Och. COLING 2004.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\nfrom __future__ import unicode_literals\n\nimport collections\nimport math\n\nfrom texar.utils.dtypes import compat_as_text, is_str\n\n# pylint: disable=invalid-name, too-many-branches, too-many-locals\n# pylint: disable=too-many-arguments\n\n__all__ = [\n    \"sentence_bleu\",\n    \"corpus_bleu\"\n]\n\ndef _get_ngrams(segment, max_order):\n    \"\"\"Extracts all n-grams up to a given maximum order from an input segment.\n\n    Args:\n        segment: text segment from which n-grams will be extracted.\n        max_order: maximum length in tokens of the n-grams returned by this\n            methods.\n\n    Returns:\n        The Counter containing all n-grams upto max_order in segment\n        with a count of how many times each n-gram occurred.\n    \"\"\"\n    ngram_counts = collections.Counter()\n    for order in range(1, max_order + 1):\n        for i in range(0, len(segment) - order + 1):\n            ngram = tuple(segment[i:i+order])\n            ngram_counts[ngram] += 1\n    return ngram_counts\n\ndef _maybe_str_to_list(list_or_str):\n    if is_str(list_or_str):\n        return list_or_str.split()\n    return list_or_str\n\ndef _lowercase(str_list):\n    return [str_.lower() for str_ in str_list]\n\ndef sentence_bleu(references, hypothesis, max_order=4, lowercase=False,\n                  smooth=False, return_all=False):\n    \"\"\"Calculates BLEU score of a hypothesis sentence.\n\n    Args:\n        references: A list of reference for the hypothesis.\n            Each reference can be either a list of string tokens, or a string\n            containing tokenized tokens separated with whitespaces.\n            List can also be numpy array.\n        hypotheses: A hypothesis sentence.\n            Each hypothesis can be either a list of string tokens, or a\n            string containing tokenized tokens separated with whitespaces.\n            List can also be numpy array.\n        lowercase (bool): If `True`, lowercase reference and hypothesis tokens.\n        max_order (int): Maximum n-gram order to use when computing BLEU score.\n        smooth (bool): Whether or not to apply (Lin et al. 2004) smoothing.\n        return_all (bool): If `True`, returns BLEU and all n-gram precisions.\n\n    Returns:\n        If :attr:`return_all` is `False` (default), returns a float32\n        BLEU score.\n\n        If :attr:`return_all` is `True`, returns a list of float32 scores:\n        `[BLEU] + n-gram precisions`, which is of length :attr:`max_order`+1.\n    \"\"\"\n    return corpus_bleu(\n        [references], [hypothesis], max_order=max_order, lowercase=lowercase,\n        smooth=smooth, return_all=return_all)\n\ndef corpus_bleu(list_of_references, hypotheses, max_order=4, lowercase=False,\n                smooth=False, return_all=True):\n    \"\"\"Computes corpus-level BLEU score.\n\n    Args:\n        list_of_references: A list of lists of references for each hypothesis.\n            Each reference can be either a list of string tokens, or a string\n            containing tokenized tokens separated with whitespaces.\n            List can also be numpy array.\n        hypotheses: A list of hypothesis sentences.\n            Each hypothesis can be either a list of string tokens, or a\n            string containing tokenized tokens separated with whitespaces.\n            List can also be numpy array.\n        lowercase (bool): If `True`, lowercase reference and hypothesis tokens.\n        max_order (int): Maximum n-gram order to use when computing BLEU score.\n        smooth (bool): Whether or not to apply (Lin et al. 2004) smoothing.\n        return_all (bool): If `True`, returns BLEU and all n-gram precisions.\n\n    Returns:\n        If :attr:`return_all` is `False` (default), returns a float32\n        BLEU score.\n\n        If :attr:`return_all` is `True`, returns a list of float32 scores:\n        `[BLEU] + n-gram precisions`, which is of length :attr:`max_order`+1.\n    \"\"\"\n    list_of_references = compat_as_text(list_of_references)\n    hypotheses = compat_as_text(hypotheses)\n\n    matches_by_order = [0] * max_order\n    possible_matches_by_order = [0] * max_order\n    reference_length = 0\n    hyperthsis_length = 0\n    for (references, hyperthsis) in zip(list_of_references, hypotheses):\n        reference_length += min(len(r) for r in references)\n        hyperthsis_length += len(hyperthsis)\n\n        merged_ref_ngram_counts = collections.Counter()\n        for reference in references:\n            reference = _maybe_str_to_list(reference)\n            if lowercase:\n                reference = _lowercase(reference)\n            merged_ref_ngram_counts |= _get_ngrams(reference, max_order)\n\n        hyperthsis = _maybe_str_to_list(hyperthsis)\n        if lowercase:\n            hyperthsis = _lowercase(hyperthsis)\n        hyperthsis_ngram_counts = _get_ngrams(hyperthsis, max_order)\n\n        overlap = hyperthsis_ngram_counts & merged_ref_ngram_counts\n        for ngram in overlap:\n            matches_by_order[len(ngram)-1] += overlap[ngram]\n        for order in range(1, max_order+1):\n            possible_matches = len(hyperthsis) - order + 1\n            if possible_matches > 0:\n                possible_matches_by_order[order-1] += possible_matches\n\n    precisions = [0] * max_order\n    for i in range(0, max_order):\n        if smooth:\n            precisions[i] = ((matches_by_order[i] + 1.) /\n                             (possible_matches_by_order[i] + 1.))\n        else:\n            if possible_matches_by_order[i] > 0:\n                precisions[i] = (float(matches_by_order[i]) /\n                                 possible_matches_by_order[i])\n            else:\n                precisions[i] = 0.0\n\n    if min(precisions) > 0:\n        p_log_sum = sum((1. / max_order) * math.log(p) for p in precisions)\n        geo_mean = math.exp(p_log_sum)\n    else:\n        geo_mean = 0\n\n    ratio = float(hyperthsis_length) / reference_length\n\n    if ratio > 1.0:\n        bp = 1.\n    else:\n        try:\n            bp = math.exp(1 - 1. / ratio)\n        except ZeroDivisionError:\n            bp = math.exp(1 - 1. / (ratio + 1e-8))\n\n    bleu = geo_mean * bp\n\n    if return_all:\n        return [bleu * 100] + [p * 100 for p in precisions]\n    else:\n        return bleu * 100\n"
  },
  {
    "path": "texar_repo/texar/evals/bleu_moses.py",
    "content": "# -*- coding: utf-8 -*-\n# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nThe BLEU metric.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\nfrom __future__ import unicode_literals\n\nimport os\nfrom io import open # pylint: disable=redefined-builtin\nimport shutil\nimport re\nimport subprocess\nimport tempfile\nimport numpy as np\n\nimport tensorflow as tf\n\nfrom texar.utils.dtypes import compat_as_text\n\n# pylint: disable=too-many-locals, no-member, redefined-variable-type\n\n__all__ = [\n    \"sentence_bleu_moses\",\n    \"corpus_bleu_moses\"\n]\n\ndef _maybe_list_to_str(list_or_str):\n    if isinstance(list_or_str, (tuple, list, np.ndarray)):\n        return ' '.join(list_or_str)\n    return list_or_str\n\ndef _parse_multi_bleu_ret(bleu_str, return_all=False):\n    bleu_score = re.search(r\"BLEU = (.+?),\", bleu_str).group(1)\n    bleu_score = np.float32(bleu_score)\n\n    if return_all:\n        bleus = re.search(r\", (.+?)/(.+?)/(.+?)/(.+?) \", bleu_str)\n        bleus = [bleus.group(group_idx) for group_idx in range(1, 5)]\n        bleus = [np.float32(b) for b in bleus]\n        bleu_score = [bleu_score] + bleus\n\n    return bleu_score\n\ndef sentence_bleu_moses(references, hypothesis, lowercase=False,\n                        return_all=False):\n    \"\"\"Calculates BLEU score of a hypothesis sentence using the\n    **MOSES multi-bleu.perl** script.\n\n    Args:\n        references: A list of reference for the hypothesis.\n            Each reference can be either a string, or a list of string tokens.\n            List can also be numpy array.\n        hypotheses: A hypothesis sentence.\n            The hypothesis can be either a string, or a list of string tokens.\n            List can also be numpy array.\n        lowercase (bool): If `True`, pass the \"-lc\" flag to the multi-bleu\n            script.\n        return_all (bool): If `True`, returns BLEU and all n-gram precisions.\n\n    Returns:\n        If :attr:`return_all` is `False` (default), returns a float32\n        BLEU score.\n\n        If :attr:`return_all` is `True`, returns a list of 5 float32 scores:\n        `[BLEU, 1-gram precision, ..., 4-gram precision]`.\n    \"\"\"\n    return corpus_bleu_moses(\n        [references], [hypothesis], lowercase=lowercase, return_all=return_all)\n\ndef corpus_bleu_moses(list_of_references, hypotheses, lowercase=False,\n                      return_all=False):\n    \"\"\"Calculates corpus-level BLEU score using the\n    **MOSES multi-bleu.perl** script.\n\n    Args:\n        list_of_references: A list of lists of references for each hypothesis.\n            Each reference can be either a string, or a list of string tokens.\n            List can also be numpy array.\n        hypotheses: A list of hypothesis sentences.\n            Each hyperthsis can be either a string, or a list of string tokens.\n            List can also be numpy array.\n        lowercase (bool): If `True`, pass the \"-lc\" flag to the multi-bleu\n            script.\n        return_all (bool): If `True`, returns BLEU and all n-gram precisions.\n\n    Returns:\n        If :attr:`return_all` is `False` (default), returns a float32\n        BLEU score.\n\n        If :attr:`return_all` is `True`, returns a list of 5 float32 scores:\n        `[BLEU, 1-gram precision, ..., 4-gram precision]`.\n    \"\"\"\n    list_of_references = compat_as_text(list_of_references)\n    hypotheses = compat_as_text(hypotheses)\n\n    if np.size(hypotheses) == 0:\n        return np.float32(0.)   # pylint: disable=no-member\n\n    # Get multi-bleu.perl\n    cur_dir = os.path.dirname(os.path.realpath(__file__))\n    multi_bleu_path = os.path.abspath(\n        os.path.join(cur_dir, \"..\", \"..\", \"bin\", \"utils\", \"multi-bleu.perl\"))\n\n    # Create a temporary folder containing hyperthesis and reference files\n    result_path = tempfile.mkdtemp()\n    # Create hyperthesis file\n    hfile_path = os.path.join(result_path, 'hyp')\n    hyps = [_maybe_list_to_str(h) for h in hypotheses]\n    with open(hfile_path, 'w', encoding='utf-8') as hfile:\n        text = \"\\n\".join(hyps)\n        hfile.write(text)\n        hfile.write(\"\\n\")\n    # Create reference files\n    max_nrefs = max([len(refs) for refs in list_of_references])\n    rfile_path = os.path.join(result_path, 'ref')\n    for rid in range(max_nrefs):\n        with open(rfile_path + '%d'%rid, 'w', encoding='utf-8') as rfile:\n            for refs in list_of_references:\n                if rid < len(refs):\n                    ref = _maybe_list_to_str(refs[rid])\n                    rfile.write(ref + \"\\n\")\n                else:\n                    rfile.write(\"\\n\")\n\n    # Calculate BLEU\n    multi_bleu_cmd = [multi_bleu_path]\n    if lowercase:\n        multi_bleu_cmd += [\"-lc\"]\n    multi_bleu_cmd += [rfile_path]\n    with open(hfile_path, \"r\") as hyp_input:\n        try:\n            multi_bleu_ret = subprocess.check_output(\n                multi_bleu_cmd, stdin=hyp_input, stderr=subprocess.STDOUT)\n            multi_bleu_ret = multi_bleu_ret.decode(\"utf-8\")\n            bleu_score = _parse_multi_bleu_ret(multi_bleu_ret, return_all)\n        except subprocess.CalledProcessError as error:\n            if error.output is not None:\n                tf.logging.warning(\n                    \"multi-bleu.perl returned non-zero exit code\")\n                tf.logging.warning(error.output)\n            if return_all:\n                bleu_score = [np.float32(0.0)] * 5\n            else:\n                bleu_score = np.float32(0.0)\n\n    shutil.rmtree(result_path)\n\n    return np.float32(bleu_score)\n"
  },
  {
    "path": "texar_repo/texar/evals/bleu_test.py",
    "content": "# -*- coding: utf-8 -*-\n#\n\"\"\"\nUnit tests for bleu.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport numpy as np\n\nimport tensorflow as tf\n\nfrom texar.evals.bleu_moses import sentence_bleu_moses, corpus_bleu_moses\nfrom texar.evals.bleu import sentence_bleu, corpus_bleu\n\n# pylint: disable=too-many-locals, too-many-arguments\n\nclass BLEUTest(tf.test.TestCase):\n    \"\"\"Tests the bleu functions.\n    \"\"\"\n\n    def _test_sentence_bleu(self, references, hypothesis, lowercase,\n                            true_bleu):\n        bleu = sentence_bleu_moses(references=references,\n                                   hypothesis=hypothesis,\n                                   lowercase=lowercase)\n        self.assertAlmostEqual(bleu, true_bleu, places=2)\n\n        bleu = sentence_bleu(references=references,\n                             hypothesis=hypothesis,\n                             lowercase=lowercase)\n        self.assertAlmostEqual(bleu, true_bleu, places=0)\n\n    def test_sentence_strings(self):\n        \"\"\"Tests hypothesis as strings.\n        \"\"\"\n        hypothesis = \\\n            \"this is a test sentence to evaluate the good bleu score . 词\"\n        references = [\"this is a test sentence to evaluate the bleu score .\"]\n        self._test_sentence_bleu(\n            references, hypothesis, lowercase=False, true_bleu=67.03)\n\n    def test_sentence_list(self):\n        \"\"\"Tests hypothesis as a list of tokens.\n        \"\"\"\n        hypothesis = \\\n            \"this is a test sentence to evaluate the good bleu score . 词\"\n        hypothesis = hypothesis.split()\n        references = [\"this is a test sentence to evaluate the bleu score .\"]\n        references = [references[0].split()]\n        self._test_sentence_bleu(\n            references, hypothesis, lowercase=False, true_bleu=67.03)\n\n    def test_sentence_multi_references(self):\n        \"\"\"Tests multiple references.\n        \"\"\"\n        hypothesis = \\\n            \"this is a test sentence to evaluate the good bleu score . 词\"\n        references = [\"this is a test sentence to evaluate the bleu score .\",\n                      \"this is a test sentence to evaluate the good score .\"]\n        self._test_sentence_bleu(\n            references, hypothesis, lowercase=False, true_bleu=76.12)\n\n    def test_sentence_numpy(self):\n        \"\"\"Tests with numpy format.\n        \"\"\"\n        hypothesis = \\\n            \"this is a test sentence to evaluate the good bleu score . 词\"\n        hypothesis = np.array(hypothesis.split())\n        references = [\"this is a test sentence to evaluate the bleu score .\",\n                      \"this is a test sentence to evaluate the good score .\"]\n        references = np.array([np.array(r.split()) for r in references])\n        self._test_sentence_bleu(\n            references, hypothesis, lowercase=False, true_bleu=76.12)\n\n\n    def _test_corpus_bleu(self, list_of_references, hypotheses, lowercase,\n                          return_all, true_bleu):\n        bleu = corpus_bleu_moses(list_of_references=list_of_references,\n                                 hypotheses=hypotheses,\n                                 lowercase=lowercase,\n                                 return_all=return_all)\n        if not return_all:\n            self.assertAlmostEqual(bleu, true_bleu, places=2)\n        else:\n            for ret, true in zip(bleu, true_bleu):\n                self.assertAlmostEqual(ret, true, places=2)\n\n\n        bleu = corpus_bleu(list_of_references=list_of_references,\n                           hypotheses=hypotheses,\n                           lowercase=lowercase,\n                           return_all=return_all)\n        if not return_all:\n            self.assertAlmostEqual(bleu, true_bleu, places=0)\n        else:\n            for ret, true in zip(bleu, true_bleu):\n                self.assertAlmostEqual(ret, true, places=0)\n\n\n    def test_corpus_strings(self):\n        \"\"\"Tests corpus level BLEU.\n        \"\"\"\n        hypotheses = [\n            \"this is a test sentence to evaluate the good bleu score . 词\",\n            \"i believe that that the script is 词 perfectly correct .\"\n        ]\n        list_of_references = [\n            [\"this is a test sentence to evaluate the bleu score .\",\n             \"this is a test sentence to evaluate the good score .\"],\n            [\"i believe that the script is perfectly correct .\".split()]\n        ]\n        self._test_corpus_bleu(list_of_references, hypotheses,\n                               False, False, 63.02)\n\n        self._test_corpus_bleu(list_of_references, hypotheses,\n                               False, True, [63.02, 87.5, 77.3, 60.0, 38.9])\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/evals/metrics.py",
    "content": "\"\"\"\nVarious metrics.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\nfrom __future__ import unicode_literals\n\nimport tensorflow as tf\n\n__all__ = [\n    \"accuracy\",\n    \"binary_clas_accuracy\"\n]\n\ndef accuracy(labels, preds):\n    \"\"\"Calculates the accuracy of predictions.\n\n    Args:\n        labels: The ground truth values. A Tensor of the same shape of\n            :attr:`preds`.\n        preds: A Tensor of any shape containing the predicted values.\n\n    Returns:\n        A float scalar Tensor containing the accuracy.\n    \"\"\"\n    labels = tf.cast(labels, preds.dtype)\n    return tf.reduce_mean(tf.to_float(tf.equal(preds, labels)))\n\ndef binary_clas_accuracy(pos_preds=None, neg_preds=None):\n    \"\"\"Calculates the accuracy of binary predictions.\n\n    Args:\n        pos_preds (optional): A Tensor of any shape containing the\n            predicted values on positive data (i.e., ground truth labels are\n            `1`).\n        neg_preds (optional): A Tensor of any shape containing the\n            predicted values on negative data (i.e., ground truth labels are\n            `0`).\n\n    Returns:\n        A float scalar Tensor containing the accuracy.\n    \"\"\"\n    pos_accu = accuracy(tf.ones_like(pos_preds), pos_preds)\n    neg_accu = accuracy(tf.zeros_like(neg_preds), neg_preds)\n    psize = tf.to_float(tf.size(pos_preds))\n    nsize = tf.to_float(tf.size(neg_preds))\n    accu = (pos_accu * psize + neg_accu * nsize) / (psize + nsize)\n    return accu\n"
  },
  {
    "path": "texar_repo/texar/hyperparams.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nHyperparameter manager\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\n\nimport copy\nimport json\n\nfrom texar.utils.dtypes import is_callable\n\n__all__ = [\n    \"HParams\"\n]\n\ndef _type_name(value):\n    return type(value).__name__\n\nclass HParams(object):\n    \"\"\"A class that maintains hyperparameters for configing Texar modules.\n    The class has several useful features:\n\n    - **Auto-completion of missing values.** Users can specify only a subset of\\\n    hyperparameters they care about. Other hyperparameters will automatically\\\n    take the default values. The auto-completion performs **recursively** so \\\n    that hyperparameters taking `dict` values will also be auto-completed \\\n    **All Texar modules** provide a \\\n    :meth:`default_hparams` containing allowed hyperparameters and their \\\n    default values. For example\n\n        .. code-block:: python\n\n            ## Recursive auto-completion\n            default_hparams = {\"a\": 1, \"b\": {\"c\": 2, \"d\": 3}}\n            hparams = {\"b\": {\"c\": 22}}\n            hparams_ = HParams(hparams, default_hparams)\n            hparams_.todict() == {\"a\": 1, \"b\": {\"c\": 22, \"d\": 3}}\n                # \"a\" and \"d\" are auto-completed\n\n            ## All Texar modules have built-in `default_hparams`\n            hparams = {\"dropout_rate\": 0.1}\n            emb = tx.modules.WordEmbedder(hparams=hparams, ...)\n            emb.hparams.todict() == {\n                \"dropout_rate\": 0.1,  # provided value\n                \"dim\": 100            # default value\n                ...\n            }\n\n    - **Automatic typecheck.** For most hyperparameters, provided value must\\\n    have the same or compatible dtype with the default value. HParams does\\\n    necessary typecheck, and raises Error if improper dtype is provided.\\\n    Also, hyperparameters not listed in `default_hparams` are not allowed,\\\n    except for \"kwargs\" as detailed below.\n\n    - **Flexible dtype for specified hyperparameters.**  Some hyperparameters\\\n    may allow different dtypes of values.\n\n        - Hyperparameters named \"type\" are not typechecked.\\\n        For example, in :func:`~texar.core.get_rnn_cell`, hyperparameter \\\n        `\"type\"` can take value of an RNNCell class, its string name of module \\\n        path, or an RNNCell class instance. (String name or module path is \\\n        allowd so that users can specify the value in YAML config files.)\n\n        - For other hyperparameters, list them\\\n        in the \"@no_typecheck\" field in `default_hparams` to skip typecheck. \\\n        For example, in :func:`~texar.core.get_rnn_cell`, hyperparameter \\\n        \"*_keep_prob\" can be set to either a `float` or a `tf.placeholder`.\n\n    - **Special flexibility of keyword argument hyparameters.** \\\n    Hyperparameters named \"kwargs\" are used as keyword arguments for a class\\\n    constructor or a function call. Such hyperparameters take a `dict`, and \\\n    users can add arbitrary valid keyword arguments to the dict. For example:\n\n        .. code-block:: python\n\n            default_rnn_cell_hparams = {\n                \"type\": \"LSTMCell\",\n                \"kwargs\": {\"num_units\": 256}\n                # Other hyperparameters\n                ...\n            }\n            my_hparams = {\n                \"kwargs\" {\n                    \"num_units\": 123,\n                    \"forget_bias\": 0.0         # Other valid keyword arguments\n                    \"activation\": \"tf.nn.relu\" # for LSTMCell constructor\n                }\n            }\n            _ = HParams(my_hparams, default_rnn_cell_hparams)\n\n    - **Rich interfaces.** An HParams instance provides rich interfaces for\\\n    accessing, updating, or adding hyperparameters.\n\n        .. code-block:: python\n\n            hparams = HParams(my_hparams, default_hparams)\n            # Access\n            hparams.type == hparams[\"type\"]\n            # Update\n            hparams.type = \"GRUCell\"\n            hparams.kwargs = { \"num_units\": 100 }\n            hparams.kwargs.num_units == 100\n            # Add new\n            hparams.add_hparam(\"index\", 1)\n            hparams.index == 1\n\n            # Convert to `dict` (recursively)\n            type(hparams.todic()) == dict\n\n            # I/O\n            pickle.dump(hparams, \"hparams.dump\")\n            with open(\"hparams.dump\", 'rb') as f:\n                hparams_loaded = pickle.load(f)\n\n\n    Args:\n        hparams: A `dict` or an `HParams` instance containing hyperparameters.\n            If `None`, all hyperparameters are set to default values.\n        default_hparams (dict): Hyperparameters with default values. If `None`,\n            Hyperparameters are fully defined by :attr:`hparams`.\n        allow_new_hparam (bool): If `False` (default), :attr:`hparams` cannot\n            contain hyperparameters that are not included in\n            :attr:`default_hparams`, except for the case of :attr:`\"kwargs\"` as\n            above.\n    \"\"\"\n    # - The default hyperparameters in :attr:`\"kwargs\"` are used (for typecheck\\\n    # and complementing missing hyperparameters) only when :attr:`\"type\"` \\\n    # takes default value (i.e., missing in :attr:`hparams` or set to \\\n    # the same value with the default). In this case :attr:`kwargs` allows to \\\n    # contain new keys not included in :attr:`default_hparams[\"kwargs\"]`.\n    #\n    # - If :attr:`\"type\"` is set to an other \\\n    # value and :attr:`\"kwargs\"` is missing in :attr:`hparams`, \\\n    # :attr:`\"kwargs\"` is set to an empty dictionary.\n\n    def __init__(self, hparams, default_hparams, allow_new_hparam=False):\n        if isinstance(hparams, HParams):\n            hparams = hparams.todict()\n        if default_hparams is not None:\n            parsed_hparams = self._parse(\n                hparams, default_hparams, allow_new_hparam)\n        else:\n            parsed_hparams = self._parse(hparams, hparams)\n        super(HParams, self).__setattr__('_hparams', parsed_hparams)\n\n    @staticmethod\n    def _parse(hparams, # pylint: disable=too-many-branches, too-many-statements\n               default_hparams,\n               allow_new_hparam=False):\n        \"\"\"Parses hyperparameters.\n\n        Args:\n            hparams (dict): Hyperparameters. If `None`, all hyperparameters are\n                set to default values.\n            default_hparams (dict): Hyperparameters with default values.\n                If `None`,Hyperparameters are fully defined by :attr:`hparams`.\n            allow_new_hparam (bool): If `False` (default), :attr:`hparams`\n                cannot contain hyperparameters that are not included in\n                :attr:`default_hparams`, except the case of :attr:`\"kwargs\"`.\n\n        Return:\n            A dictionary of parsed hyperparameters. Returns `None` if both\n            :attr:`hparams` and :attr:`default_hparams` are `None`.\n\n        Raises:\n            ValueError: If :attr:`hparams` is not `None` and\n                :attr:`default_hparams` is `None`.\n            ValueError: If :attr:`default_hparams` contains \"kwargs\" not does\n                not contains \"type\".\n        \"\"\"\n        if hparams is None and default_hparams is None:\n            return None\n\n        if hparams is None:\n            return HParams._parse(default_hparams, default_hparams)\n\n        if default_hparams is None:\n            raise ValueError(\"`default_hparams` cannot be `None` if `hparams` \"\n                             \"is not `None`.\")\n        no_typecheck_names = default_hparams.get(\"@no_typecheck\", [])\n\n        if \"kwargs\" in default_hparams and \"type\" not in default_hparams:\n            raise ValueError(\"Ill-defined hyperparameter structure: 'kwargs' \"\n                             \"must accompany with 'type'.\")\n\n        parsed_hparams = copy.deepcopy(default_hparams)\n\n        # Parse recursively for params of type dictionary that are missing\n        # in `hparams`.\n        for name, value in default_hparams.items():\n            if name not in hparams and isinstance(value, dict):\n                if name == \"kwargs\" and \"type\" in hparams and \\\n                        hparams[\"type\"] != default_hparams[\"type\"]:\n                    # Set params named \"kwargs\" to empty dictionary if \"type\"\n                    # takes value other than default.\n                    parsed_hparams[name] = HParams({}, {})\n                else:\n                    parsed_hparams[name] = HParams(value, value)\n\n        # Parse hparams\n        for name, value in hparams.items():\n            if name not in default_hparams:\n                if allow_new_hparam:\n                    parsed_hparams[name] = HParams._parse_value(value, name)\n                    continue\n                else:\n                    raise ValueError(\n                        \"Unknown hyperparameter: %s. Only hyperparameters \"\n                        \"named 'kwargs' hyperparameters can contain new \"\n                        \"entries undefined in default hyperparameters.\" % name)\n\n            if value is None:\n                parsed_hparams[name] = \\\n                    HParams._parse_value(parsed_hparams[name])\n\n            default_value = default_hparams[name]\n            if default_value is None:\n                parsed_hparams[name] = HParams._parse_value(value)\n                continue\n\n            # Parse recursively for params of type dictionary.\n            if isinstance(value, dict):\n                if name not in no_typecheck_names \\\n                        and not isinstance(default_value, dict):\n                    raise ValueError(\n                        \"Hyperparameter '%s' must have type %s, got %s\" %\n                        (name, _type_name(default_value), _type_name(value)))\n                if name == \"kwargs\":\n                    if \"type\" in hparams and \\\n                            hparams[\"type\"] != default_hparams[\"type\"]:\n                        # Leave \"kwargs\" as-is if \"type\" takes value\n                        # other than default.\n                        parsed_hparams[name] = HParams(value, value)\n                    else:\n                        # Allow new hyperparameters if \"type\" takes default\n                        # value\n                        parsed_hparams[name] = HParams(\n                            value, default_value, allow_new_hparam=True)\n                elif name in no_typecheck_names:\n                    parsed_hparams[name] = HParams(value, value)\n                else:\n                    parsed_hparams[name] = HParams(\n                        value, default_value, allow_new_hparam)\n                continue\n\n            # Do not type-check hyperparameter named \"type\" and accompanied\n            # with \"kwargs\"\n            if name == \"type\" and \"kwargs\" in default_hparams:\n                parsed_hparams[name] = value\n                continue\n\n            if name in no_typecheck_names:\n                parsed_hparams[name] = value\n            elif isinstance(value, type(default_value)):\n                parsed_hparams[name] = value\n            elif is_callable(value) and is_callable(default_value):\n                parsed_hparams[name] = value\n            else:\n                try:\n                    parsed_hparams[name] = type(default_value)(value)\n                except TypeError:\n                    raise ValueError(\n                        \"Hyperparameter '%s' must have type %s, got %s\" %\n                        (name, _type_name(default_value), _type_name(value)))\n\n        return parsed_hparams\n\n    @staticmethod\n    def _parse_value(value, name=None):\n        if isinstance(value, dict) and (name is None or name != \"kwargs\"):\n            return HParams(value, None)\n        else:\n            return value\n\n    def __getattr__(self, name):\n        \"\"\"Retrieves the value of the hyperparameter.\n        \"\"\"\n        if name == '_hparams':\n            return super(HParams, self).__getattribute__('_hparams')\n        if name not in self._hparams:\n            # Raise AttributeError to allow copy.deepcopy, etc\n            raise AttributeError(\"Unknown hyperparameter: %s\" % name)\n        return self._hparams[name]\n\n    def __getitem__(self, name):\n        \"\"\"Retrieves the value of the hyperparameter.\n        \"\"\"\n        return self.__getattr__(name)\n\n    def __setattr__(self, name, value):\n        \"\"\"Sets the value of the hyperparameter.\n        \"\"\"\n        if name not in self._hparams:\n            raise ValueError(\n                \"Unknown hyperparameter: %s. Only the `kwargs` \"\n                \"hyperparameters can contain new entries undefined \"\n                \"in default hyperparameters.\" % name)\n        self._hparams[name] = self._parse_value(value, name)\n\n    def items(self):\n        \"\"\"Returns the list of hyperparam `(name, value)` pairs\n        \"\"\"\n        return iter(self)\n\n    def keys(self):\n        \"\"\"Returns the list of hyperparam names\n        \"\"\"\n        return self._hparams.keys()\n\n    def __iter__(self):\n        for name, value in self._hparams.items():\n            yield name, value\n\n    def __len__(self):\n        return len(self._hparams)\n\n    def __contains__(self, name):\n        return name in self._hparams\n\n    def __str__(self):\n        \"\"\"Return a string of the hparams.\n        \"\"\"\n        hparams_dict = self.todict()\n        return json.dumps(hparams_dict, sort_keys=True, indent=2)\n\n    def get(self, name, default=None):\n        \"\"\"Returns the hyperparameter value for the given name. If name is not\n        available then returns :attr:`default`.\n\n        Args:\n            name (str): the name of hyperparameter.\n            default: the value to be returned in case name does not exist.\n        \"\"\"\n        try:\n            return self.__getattr__(name)\n        except AttributeError:\n            return default\n\n    def add_hparam(self, name, value):\n        \"\"\"Adds a new hyperparameter.\n        \"\"\"\n        if (name in self._hparams) or hasattr(self, name):\n            raise ValueError(\"Hyperparameter name already exists: %s\" % name)\n        self._hparams[name] = self._parse_value(value, name)\n\n    def todict(self):\n        \"\"\"Returns a copy of hyperparameters as a dictionary.\n        \"\"\"\n        dict_ = copy.deepcopy(self._hparams)\n        for name, value in self._hparams.items():\n            if isinstance(value, HParams):\n                dict_[name] = value.todict()\n        return dict_\n\n"
  },
  {
    "path": "texar_repo/texar/hyperparams_test.py",
    "content": "\"\"\"\nUnit tests of :class:`HParams`.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport copy\nimport pickle\n\nimport tempfile\nimport tensorflow as tf\n\nfrom texar.hyperparams import HParams\n\n# pylint: disable=no-member\n\nclass HParamsTest(tf.test.TestCase):\n    \"\"\"Tests hyperparameter related operations.\n    \"\"\"\n\n    def test_hparams(self):\n        \"\"\"Tests the HParams class.\n        \"\"\"\n        default_hparams = {\n            \"str\": \"str\",\n            \"list\": ['item1', 'item2'],\n            \"dict\": {\n                \"key1\": \"value1\",\n                \"key2\": \"value2\"\n            },\n            \"nested_dict\": {\n                \"dict_l2\": {\n                    \"key1_l2\": \"value1_l2\"\n                }\n            },\n            \"type\": \"type\",\n            \"kwargs\": {\n                \"arg1\": \"argv1\"\n            },\n        }\n\n        # Test HParams.items() function\n        hparams_ = HParams(None, default_hparams)\n        names = []\n        for name, _ in hparams_.items():\n            names.append(name)\n        self.assertEqual(set(names), set(default_hparams.keys()))\n\n        hparams = {\n            \"dict\": {\"key1\": \"new_value\"},\n            \"kwargs\": {\"arg2\": \"argv2\"}\n        }\n\n        hparams_ = HParams(hparams, default_hparams)\n\n        # Test HParams construction\n        self.assertEqual(hparams_.str, default_hparams[\"str\"])\n        self.assertEqual(hparams_.list, default_hparams[\"list\"])\n        self.assertEqual(hparams_.dict.key1, hparams[\"dict\"][\"key1\"])\n        self.assertEqual(hparams_.kwargs.arg2, hparams[\"kwargs\"][\"arg2\"])\n        self.assertEqual(hparams_.nested_dict.dict_l2.key1_l2,\n                         default_hparams[\"nested_dict\"][\"dict_l2\"][\"key1_l2\"])\n\n        self.assertEqual(len(hparams_), len(default_hparams))\n\n        new_hparams = copy.deepcopy(default_hparams)\n        new_hparams[\"dict\"][\"key1\"] = hparams[\"dict\"][\"key1\"]\n        new_hparams[\"kwargs\"].update(hparams[\"kwargs\"])\n        self.assertEqual(hparams_.todict(), new_hparams)\n\n        self.assertTrue(\"dict\" in hparams_)\n\n        self.assertIsNone(hparams_.get('not_existed_name', None))\n        self.assertEqual(hparams_.get('str'), default_hparams['str'])\n\n        # Test HParams update related operations\n        hparams_.str = \"new_str\"\n        hparams_.dict = {\"key3\": \"value3\"}\n        self.assertEqual(hparams_.str, \"new_str\")\n        self.assertEqual(hparams_.dict.key3, \"value3\")\n\n        hparams_.add_hparam(\"added_str\", \"added_str\")\n        hparams_.add_hparam(\"added_dict\", {\"key4\": \"value4\"})\n        hparams_.kwargs.add_hparam(\"added_arg\", \"added_argv\")\n        self.assertEqual(hparams_.added_str, \"added_str\")\n        self.assertEqual(hparams_.added_dict.todict(), {\"key4\": \"value4\"})\n        self.assertEqual(hparams_.kwargs.added_arg, \"added_argv\")\n\n        # Test HParams I/O\n        hparams_file = tempfile.NamedTemporaryFile()\n        pickle.dump(hparams_, hparams_file)\n        with open(hparams_file.name, 'rb') as hparams_file:\n            hparams_loaded = pickle.load(hparams_file)\n        self.assertEqual(hparams_loaded.todict(), hparams_.todict())\n\n\n    def test_typecheck(self):\n        \"\"\"Tests type-check functionality.\n        \"\"\"\n        def _foo():\n            pass\n        def _bar():\n            pass\n\n        default_hparams = {\n            \"fn\": _foo,\n            \"fn_2\": _foo\n        }\n        hparams = {\n            \"fn\": _foo,\n            \"fn_2\": _bar\n        }\n        hparams_ = HParams(hparams, default_hparams)\n        self.assertEqual(hparams_.fn, default_hparams[\"fn\"])\n\n\n    def test_type_kwargs(self):\n        \"\"\"The the special cases involving \"type\" and \"kwargs\"\n        hyperparameters.\n        \"\"\"\n        default_hparams = {\n            \"type\": \"type_name\",\n            \"kwargs\": {\n                \"arg1\": \"argv1\"\n            }\n        }\n\n        hparams = {\n            \"type\": \"type_name\"\n        }\n        hparams_ = HParams(hparams, default_hparams)\n        self.assertEqual(hparams_.kwargs.todict(), default_hparams[\"kwargs\"])\n\n        hparams = {\n            \"type\": \"type_name\",\n            \"kwargs\": {\n                \"arg2\": \"argv2\"\n            }\n        }\n        hparams_ = HParams(hparams, default_hparams)\n        full_kwargs = {}\n        full_kwargs.update(default_hparams[\"kwargs\"])\n        full_kwargs.update(hparams[\"kwargs\"])\n        self.assertEqual(hparams_.kwargs.todict(), full_kwargs)\n\n        hparams = {\n            \"kwargs\": {\n                \"arg2\": \"argv2\"\n            }\n        }\n        hparams_ = HParams(hparams, default_hparams)\n        self.assertEqual(hparams_.kwargs.todict(), full_kwargs)\n\n        hparams = {\n            \"type\": \"type_name2\"\n        }\n        hparams_ = HParams(hparams, default_hparams)\n        self.assertEqual(hparams_.kwargs.todict(), {})\n\n        hparams = {\n            \"type\": \"type_name2\",\n            \"kwargs\": {\n                \"arg3\": \"argv3\"\n            }\n        }\n        hparams_ = HParams(hparams, default_hparams)\n        self.assertEqual(hparams_.kwargs.todict(), hparams[\"kwargs\"])\n\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/losses/__init__.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nModules of texar losses.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=wildcard-import\n\nfrom texar.losses.losses_utils import *\nfrom texar.losses.mle_losses import *\nfrom texar.losses.pg_losses import *\nfrom texar.losses.adv_losses import *\nfrom texar.losses.rewards import *\nfrom texar.losses.entropy import *\n"
  },
  {
    "path": "texar_repo/texar/losses/adv_losses.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nAdversarial losses.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\ndef binary_adversarial_losses(real_data,\n                              fake_data,\n                              discriminator_fn,\n                              mode=\"max_real\"):\n    \"\"\"Computes adversarial losses of real/fake binary discrimination game.\n\n    .. role:: python(code)\n       :language: python\n\n    Args:\n        real_data (Tensor or array): Real data of shape\n            `[num_real_examples, ...]`.\n        fake_data (Tensor or array): Fake data of shape\n            `[num_fake_examples, ...]`. `num_real_examples` does not\n            necessarily equal `num_fake_examples`.\n        discriminator_fn: A callable takes data (e.g., :attr:`real_data` and\n            :attr:`fake_data`) and returns the logits of being real. The\n            signature of `discriminator_fn` must be:\n            :python:`logits, ... = discriminator_fn(data)`.\n            The return value of `discriminator_fn` can be the logits, or\n            a tuple where the logits are the first element.\n\n        mode (str): Mode of the generator loss. Either \"max_real\" or \"min_fake\".\n\n            - **\"max_real\"** (default): minimizing the generator loss is to\\\n            maximize the probability of fake data being classified as real.\n\n            - **\"min_fake\"**: minimizing the generator loss is to minimize the\\\n            probability of fake data being classified as fake.\n\n    Returns:\n        A tuple `(generator_loss, discriminator_loss)` each of which is\n        a scalar Tensor, loss to be minimized.\n    \"\"\"\n    real_logits = discriminator_fn(real_data)\n    if isinstance(real_logits, (list, tuple)):\n        real_logits = real_logits[0]\n    real_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(\n        logits=real_logits, labels=tf.ones_like(real_logits)))\n\n    fake_logits = discriminator_fn(fake_data)\n    if isinstance(fake_logits, (list, tuple)):\n        fake_logits = fake_logits[0]\n    fake_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(\n        logits=fake_logits, labels=tf.zeros_like(fake_logits)))\n\n    d_loss = real_loss + fake_loss\n\n    if mode == \"min_fake\":\n        g_loss = - fake_loss\n    elif mode == \"max_real\":\n        g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(\n            logits=fake_logits, labels=tf.ones_like(fake_logits)))\n    else:\n        raise ValueError(\"Unknown mode: %s. Only 'min_fake' and 'max_real' \"\n                         \"are allowed.\")\n\n    return g_loss, d_loss\n"
  },
  {
    "path": "texar_repo/texar/losses/adv_losses_test.py",
    "content": "#\n\"\"\"\nTests adversarial loss related functions.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport tensorflow as tf\n\nfrom texar.losses.adv_losses import binary_adversarial_losses\n\nclass AdvLossesTest(tf.test.TestCase):\n    \"\"\"Tests adversarial losses.\n    \"\"\"\n    def test_binary_adversarial_losses(self):\n        \"\"\"Tests :meth:`~texar.losses.adv_losses.binary_adversarial_losse`.\n        \"\"\"\n        batch_size = 16\n        data_dim = 64\n        real_data = tf.zeros([batch_size, data_dim], dtype=tf.float32)\n        fake_data = tf.ones([batch_size, data_dim], dtype=tf.float32)\n        const_logits = tf.zeros([batch_size], dtype=tf.float32)\n        # Use a dumb discriminator that always outputs logits=0.\n        gen_loss, disc_loss = binary_adversarial_losses(\n            real_data, fake_data, lambda x: const_logits)\n        gen_loss_2, disc_loss_2 = binary_adversarial_losses(\n            real_data, fake_data, lambda x: const_logits, mode=\"min_fake\")\n\n        with self.test_session() as sess:\n            gen_loss_, disc_loss_ = sess.run([gen_loss, disc_loss])\n            gen_loss_2_, disc_loss_2_ = sess.run([gen_loss_2, disc_loss_2])\n            self.assertAlmostEqual(gen_loss_, -gen_loss_2_)\n            self.assertAlmostEqual(disc_loss_, disc_loss_2_)\n\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/losses/entropy.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nVarious entropies.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom texar.losses.losses_utils import mask_and_reduce, reduce_dimensions\nfrom texar.utils.shapes import get_rank\n\n# pylint: disable=too-many-arguments\n\n__all__ = [\n    \"entropy_with_logits\",\n    \"sequence_entropy_with_logits\"\n]\n\ndef _get_entropy(logits):\n    probs = tf.nn.softmax(logits) + 1e-8\n    entropy = - probs * tf.log(probs)\n    entropy = tf.reduce_sum(entropy, -1)\n    return entropy\n\ndef entropy_with_logits(logits,\n                        rank=None,\n                        average_across_batch=True,\n                        average_across_remaining=False,\n                        sum_over_batch=False,\n                        sum_over_remaining=True):\n    \"\"\"Shannon entropy given logits.\n\n    Args:\n        logits: Unscaled log probabilities of shape\n            `[batch_size, d_2, ..., d_{rank-1}, distribution_dim]`\n            and of dtype `float32` or `float64`.\n\n            The rank of the tensor is optionally specified by the argument\n            :attr:`rank`.\n\n            The tensor is considered as having `[batch_size, .., d_{rank-1}]`\n            elements, each of which has a distribution of length `d_rank`\n            (i.e., `distribution_dim`). So the last dimension is always\n            summed out to compute the entropy.\n        rank (int, optional): The rank of :attr:`logits`.\n            If `None` (default), `rank` is inferred automatically from\n            `logits`. If the inference fails, `rank` is\n            set to 2, i.e., assuming :attr:`logits` is of shape\n            `[batch_size, distribution_dim]`\n        average_across_batch (bool): If set, average the entropy across the\n            batch dimension. Must not set `average_across_batch`'\n            and `sum_over_batch` at the same time.\n        average_across_remaining (bool): If set, average the entropy across the\n            remaining dimensions. Must not set `average_across_remaining`'\n            and `sum_over_remaining` at the same time.\n            Used only when :attr:`logits` has rank >= 3.\n        sum_over_batch (bool): If set, sum the entropy across the\n            batch dimension. Must not set `average_across_batch`\n            and `sum_over_batch` at the same time.\n        sum_over_remaining (bool): If set, sum the entropy across the\n            remaining dimension. Must not set `average_across_remaining`\n            and `sum_over_remaining` at the same time.\n            Used only when :attr:`logits` has rank >= 3.\n\n    Returns:\n        A Tensor containing the shannon entropy. The dimensionality of the\n        Tensor depends on the configuration of reduction arguments. For\n        example, if both batch and remaining dimensions are reduced (by\n        either sum or average), the returned Tensor is a scalar Tensor.\n    \"\"\"\n    entropy = _get_entropy(logits)\n\n    if rank is None:\n        rank = get_rank(logits)\n    if rank is None:\n        rank = 2\n    rank -= 1 # reduced last dimension\n\n    # Reduces\n    if average_across_batch and sum_over_batch:\n        raise ValueError(\"Only one of `average_across_batch` and \"\n                         \"`sum_over_batch` can be set.\")\n    if average_across_remaining and sum_over_remaining:\n        raise ValueError(\"Only one of `average_across_remaining` and \"\n                         \"`sum_over_remaining` can be set.\")\n    sum_axes, average_axes = [], []\n    if sum_over_batch:\n        sum_axes.append(0)\n    if average_across_batch:\n        average_axes.append(0)\n    if sum_over_remaining and rank >= 2:\n        sum_axes += list(range(1, rank))\n    if average_across_remaining and rank >= 2:\n        average_axes += list(range(1, rank))\n\n    entropy = reduce_dimensions(\n        entropy, average_axes=average_axes, sum_axes=sum_axes)\n\n    return entropy\n\ndef sequence_entropy_with_logits(logits,\n                                 rank=None,\n                                 sequence_length=None,\n                                 average_across_batch=True,\n                                 average_across_timesteps=False,\n                                 average_across_remaining=False,\n                                 sum_over_batch=False,\n                                 sum_over_timesteps=True,\n                                 sum_over_remaining=True,\n                                 time_major=False):\n    \"\"\"Shannon entropy given logits.\n\n    Args:\n        logits: Unscaled log probabilities of shape\n            `[batch_size, max_time, d_3, ..., d_{rank-1}, distribution_dim]`\n            and of dtype `float32` or `float64`.\n\n            The rank of the tensor is optionally specified by the argument\n            :attr:`rank`.\n\n            The tensor is considered as having `[batch_size, .., d_{rank-1}]`\n            elements, each of which has a distribution of length `d_rank`\n            (i.e., `distribution_dim`). So the last dimension is always\n            summed out to compute the entropy.\n\n            The batch and time dimensions are exchanged if :attr:`time_major`\n            is `True`.\n        rank (int, optional): The rank of :attr:`logits`.\n            If `None` (default), `rank` is inferred automatically from\n            `logits`. If the inference fails, `rank` is\n            set to 3, i.e., assuming `logits` is of shape\n            `[batch_size, max_time, distribution_dim]`\n        sequence_length (optional): A Tensor of shape `[batch_size]`.\n            Time steps beyond the respective sequence lengths are\n            counted into the entropy.\n        average_across_timesteps (bool): If set, average the entropy across\n            the time dimension. Must not set `average_across_timesteps`\n            and `sum_over_timesteps` at the same time.\n        average_across_batch (bool): If set, average the entropy across the\n            batch dimension. Must not set `average_across_batch`'\n            and `sum_over_batch` at the same time.\n        average_across_remaining (bool): If set, average the entropy across the\n            remaining dimensions. Must not set `average_across_remaining`'\n            and `sum_over_remaining` at the same time.\n            Used only when :attr:`logits` has rank >= 4.\n        sum_over_timesteps (bool): If set, sum the entropy across the\n            time dimension. Must not set `average_across_timesteps`\n            and `sum_over_timesteps` at the same time.\n        sum_over_batch (bool): If set, sum the entropy across the\n            batch dimension. Must not set `average_across_batch`\n            and `sum_over_batch` at the same time.\n        sum_over_remaining (bool): If set, sum the entropy across the\n            remaining dimension. Must not set `average_across_remaining`\n            and `sum_over_remaining` at the same time.\n            Used only when :attr:`logits` has rank >= 4.\n        time_major (bool): The shape format of the inputs. If `True`,\n            :attr:`logits` must have shape `[max_time, batch_size, ...]`.\n            If `False` (default), it must have shape\n            `[batch_size, max_time, ...]`.\n\n    Returns:\n        A Tensor containing the shannon entropy. The dimensionality of the\n        Tensor depends on the configuration of reduction arguments. For\n        example, if batch, time, and remaining dimensions are all reduced (by\n        either sum or average), the returned Tensor is a scalar Tensor.\n    \"\"\"\n    entropy = _get_entropy(logits)\n\n    if rank is None:\n        rank = get_rank(logits)\n    if rank is None:\n        rank = 3\n    rank -= 1 # reduced last dimension\n\n    entropy = mask_and_reduce(\n        entropy,\n        sequence_length,\n        rank=rank,\n        average_across_batch=average_across_batch,\n        average_across_timesteps=average_across_timesteps,\n        average_across_remaining=average_across_remaining,\n        sum_over_batch=sum_over_batch,\n        sum_over_timesteps=sum_over_timesteps,\n        sum_over_remaining=sum_over_remaining,\n        time_major=time_major)\n\n    return entropy\n"
  },
  {
    "path": "texar_repo/texar/losses/losses_utils.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nVarious utilities for losses.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport numpy as np\n\nimport tensorflow as tf\nfrom tensorflow.python.ops import rnn          # pylint: disable=E0611\n\nfrom texar.utils.shapes import mask_sequences\n\n# pylint: disable=invalid-name, not-context-manager, protected-access,\n# pylint: disable=too-many-arguments\n\n__all__ = [\n    \"mask_and_reduce\",\n    \"reduce_batch_time\",\n    \"reduce_dimensions\"\n]\n\ndef mask_and_reduce(sequence,\n                    sequence_length,\n                    rank=2,\n                    average_across_batch=True,\n                    average_across_timesteps=False,\n                    average_across_remaining=False,\n                    sum_over_batch=False,\n                    sum_over_timesteps=True,\n                    sum_over_remaining=True,\n                    dtype=None,\n                    time_major=False):\n    \"\"\"Masks out sequence entries that are beyond the respective sequence\n    lengths, and reduces (average or sum) away dimensions.\n\n    This is a combination of :func:`~texar.utils.shapes.mask_sequences`\n    and :func:`~texar.losses.losses_utils.reduce_batch_time`.\n\n    Args:\n        sequence: A Tensor of sequence values.\n            If `time_major=False` (default), this must be a Tensor of shape\n            `[batch_size, max_time, d_2, ..., d_rank]`, where the rank of\n            the Tensor is specified with :attr:`rank`.\n            The batch and time dimensions are exchanged if `time_major` is True.\n        sequence_length: A Tensor of shape `[batch_size]`. Time steps beyond\n            the respective sequence lengths will be made zero. If `None`,\n            not masking is performed.\n        rank (int): The rank of :attr:`sequence`. Must be >= 2. Default is 2,\n            i.e., `sequence` is a 2D Tensor consisting of batch and time\n            dimensions.\n        average_across_timesteps (bool): If set, average the sequence across\n            the time dimension. Must not set `average_across_timesteps`\n            and `sum_over_timesteps` at the same time.\n        average_across_batch (bool): If set, average the sequence across the\n            batch dimension. Must not set `average_across_batch`'\n            and `sum_over_batch` at the same time.\n        average_across_remaining (bool): If set, average the sequence across the\n            remaining dimensions. Must not set `average_across_remaining`'\n            and `sum_over_remaining` at the same time.\n        sum_over_timesteps (bool): If set, sum the loss across the\n            time dimension. Must not set `average_across_timesteps`\n            and `sum_over_timesteps` at the same time.\n        sum_over_batch (bool): If set, sum the loss across the\n            batch dimension. Must not set `average_across_batch`\n            and `sum_over_batch` at the same time.\n        sum_over_remaining (bool): If set, sum the loss across the\n            remaining dimension. Must not set `average_across_remaining`\n            and `sum_over_remaining` at the same time.\n        time_major (bool): The shape format of the inputs. If `True`,\n            :attr:`sequence` must have shape `[max_time, batch_size, ...]`.\n            If `False` (default), `sequence` must have\n            shape `[batch_size, max_time, ...]`.\n        dtype (dtype): Type of :attr:`sequence`. If `None`, infer from\n            :attr:`sequence` automatically.\n\n    Returns\n        A Tensor containing the masked and reduced sequence.\n    \"\"\"\n    if rank < 2:\n        raise ValueError('`rank` must be >= 2.')\n\n    if time_major:\n        sequence = rnn._transpose_batch_time(sequence)\n\n    if sequence_length is not None:\n        sequence = mask_sequences(sequence, sequence_length, dtype=dtype,\n                                  time_major=False, tensor_rank=rank)\n\n    if rank > 2:\n        if average_across_remaining and sum_over_remaining:\n            raise ValueError(\"Only one of `average_across_remaining` and \"\n                             \"`sum_over_remaining` can be set.\")\n        if average_across_remaining:\n            sequence = tf.reduce_mean(sequence, axis=np.arange(2, rank))\n        elif sum_over_remaining:\n            sequence = tf.reduce_sum(sequence, axis=np.arange(2, rank))\n\n    sequence = reduce_batch_time(sequence,\n                                 sequence_length,\n                                 average_across_batch,\n                                 average_across_timesteps,\n                                 sum_over_batch,\n                                 sum_over_timesteps)\n\n    reduce_time = average_across_timesteps or sum_over_timesteps\n    reduce_batch = average_across_batch or sum_over_batch\n    if not reduce_time and not reduce_batch and time_major:\n        sequence = rnn._transpose_batch_time(sequence)\n\n    return sequence\n\n\ndef reduce_batch_time(sequence,\n                      sequence_length,\n                      average_across_batch=True,\n                      average_across_timesteps=False,\n                      sum_over_batch=False,\n                      sum_over_timesteps=True):\n    \"\"\"Average or sum over the respective dimensions of :attr:`sequence`, which\n    is of shape `[batch_size, max_time, ...]`.\n\n    Assumes :attr:`sequence` has been properly masked according to\n    :attr:`sequence_length`.\n    \"\"\"\n    if average_across_timesteps and sum_over_timesteps:\n        raise ValueError(\"Only one of `average_across_timesteps` and \"\n                         \"`sum_over_timesteps` can be set.\")\n    if average_across_batch and sum_over_batch:\n        raise ValueError(\"Only one of `average_across_batch` and \"\n                         \"`sum_over_batch` can be set.\")\n\n    if sum_over_timesteps:\n        sequence = tf.reduce_sum(sequence, axis=[1])\n    elif average_across_timesteps:\n        if sequence_length is None:\n            sequence = tf.reduce_mean(sequence, axis=[1])\n        else:\n            sequence = tf.reduce_sum(sequence, axis=[1])\n            if average_across_timesteps:\n                sequence = sequence / tf.to_float(sequence_length)\n\n    if sum_over_batch:\n        sequence = tf.reduce_sum(sequence, axis=[0])\n    elif average_across_batch:\n        sequence = tf.reduce_mean(sequence, axis=[0])\n\n    return sequence\n\n\ndef reduce_dimensions(tensor, average_axes=None, sum_axes=None, keepdims=None):\n    \"\"\"Average or sum over dimensions of :attr:`tensor`.\n\n    :attr:`average_axes` and :attr:`sum_axes` must be mutually exclusive. That\n    is, elements in `average_axes` must not be contained in\n    `sum_axes`, and vice versa.\n\n    Args:\n        tensor: A tensor to reduce.\n        average_axes (optional): A (list of) `int` that indicates the\n            dimensions to reduce by taking average.\n        sum_axes (optional): A (list of) `int` that indicates the\n            dimensions to reduce by taking sum.\n        keepdims (optional): If `True`, retains reduced dimensions with\n            length 1.\n    \"\"\"\n    reduced_axes = []\n    if average_axes is not None and len(average_axes) > 0:\n        tensor = tf.reduce_mean(tensor, axis=average_axes, keepdims=True)\n\n        if not isinstance(average_axes, (list, tuple)):\n            average_axes = [average_axes]\n        reduced_axes += average_axes\n\n    if sum_axes is not None and len(sum_axes) > 0:\n        tensor = tf.reduce_sum(tensor, axis=sum_axes, keepdims=True)\n\n        if not isinstance(sum_axes, (list, tuple)):\n            sum_axes = [sum_axes]\n        reduced_axes += sum_axes\n\n        if average_axes is not None:\n            if len(reduced_axes) != len(average_axes) + len(sum_axes):\n                raise ValueError('`average_axes` and `sum_axes` must not have '\n                                 'overlapped elements.')\n\n    if not keepdims:\n        tensor = tf.squeeze(tensor, axis=reduced_axes)\n\n    return tensor\n\n"
  },
  {
    "path": "texar_repo/texar/losses/mle_losses.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nVarious losses\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom texar.losses.losses_utils import mask_and_reduce, reduce_dimensions\nfrom texar.utils import shapes\n\n# pylint: disable=invalid-name, not-context-manager, protected-access,\n# pylint: disable=too-many-arguments\n\n__all__ = [\n    \"sequence_softmax_cross_entropy\",\n    \"sequence_sparse_softmax_cross_entropy\",\n    \"sequence_sigmoid_cross_entropy\",\n    \"binary_sigmoid_cross_entropy\",\n    \"binary_sigmoid_cross_entropy_with_clas\"\n]\n\ndef sequence_softmax_cross_entropy(labels,\n                                   logits,\n                                   sequence_length,\n                                   average_across_batch=True,\n                                   average_across_timesteps=False,\n                                   sum_over_batch=False,\n                                   sum_over_timesteps=True,\n                                   time_major=False,\n                                   stop_gradient_to_label=False,\n                                   name=None):\n    \"\"\"Computes softmax cross entropy for each time step of sequence\n    predictions.\n\n    Args:\n        labels: Target class distributions.\n\n            - If :attr:`time_major` is `False` (default), this must be a\\\n            Tensor of shape `[batch_size, max_time, num_classes]`.\n\n            - If `time_major` is `True`, this must be a Tensor of shape\\\n            `[max_time, batch_size, num_classes]`.\n\n            Each row of `labels` should be a valid probability\n            distribution, otherwise, the computation of the gradient will be\n            incorrect.\n        logits: Unscaled log probabilities. This must have the shape of\n            `[max_time, batch_size, num_classes]` or\n            `[batch_size, max_time, num_classes]` according to\n            the value of `time_major`.\n        sequence_length: A Tensor of shape `[batch_size]`. Time steps beyond\n            the respective sequence lengths will have zero losses.\n        average_across_timesteps (bool): If set, average the loss across\n            the time dimension. Must not set `average_across_timesteps`\n            and `sum_over_timesteps` at the same time.\n        average_across_batch (bool): If set, average the loss across the\n            batch dimension. Must not set `average_across_batch`'\n            and `sum_over_batch` at the same time.\n        sum_over_timesteps (bool): If set, sum the loss across the\n            time dimension. Must not set `average_across_timesteps`\n            and `sum_over_timesteps` at the same time.\n        sum_over_batch (bool): If set, sum the loss across the\n            batch dimension. Must not set `average_across_batch`\n            and `sum_over_batch` at the same time.\n        time_major (bool): The shape format of the inputs. If `True`,\n            :attr:`labels` and :attr:`logits` must have shape\n            `[max_time, batch_size, ...]`. If `False`\n            (default), they must have shape `[batch_size, max_time, ...]`.\n        stop_gradient_to_label (bool): If set, gradient propagation to\n            :attr:`labels` will be disabled.\n        name (str, optional): A name for the operation.\n\n    Returns:\n        A Tensor containing the loss, of rank 0, 1, or 2 depending on the\n        arguments :attr:`{average_across}/{sum_over}_{timesteps}/{batch}`.\n        For example:\n\n        - If :attr:`sum_over_timesteps` and :attr:`average_across_batch`  \\\n        are `True` (default), the return Tensor is of rank 0.\n\n        - If :attr:`average_across_batch` is `True` and other arguments are \\\n        `False`, the return Tensor is of shape `[max_time]`.\n    \"\"\"\n    with tf.name_scope(name, \"sequence_softmax_cross_entropy\"):\n        if stop_gradient_to_label:\n            labels = tf.stop_gradient(labels)\n\n        losses = tf.nn.softmax_cross_entropy_with_logits_v2(\n            labels=labels, logits=logits)\n\n        losses = mask_and_reduce(\n            losses,\n            sequence_length,\n            rank=2,\n            average_across_batch=average_across_batch,\n            average_across_timesteps=average_across_timesteps,\n            sum_over_batch=sum_over_batch,\n            sum_over_timesteps=sum_over_timesteps,\n            time_major=time_major)\n\n        return losses\n\ndef sequence_sparse_softmax_cross_entropy(labels,\n                                          logits,\n                                          sequence_length,\n                                          average_across_batch=True,\n                                          average_across_timesteps=False,\n                                          sum_over_batch=False,\n                                          sum_over_timesteps=True,\n                                          time_major=False,\n                                          name=None):\n    \"\"\"Computes sparse softmax cross entropy for each time step of sequence\n    predictions.\n\n    Args:\n        labels: Target class indexes. I.e., classes are mutually exclusive\n            (each entry is in exactly one class).\n\n            - If :attr:`time_major` is `False` (default), this must be\\\n            a Tensor of shape `[batch_size, max_time]`.\n\n            - If `time_major` is `True`, this must be a Tensor of shape\\\n            `[max_time, batch_size].`\n        logits: Unscaled log probabilities. This must have the shape of\n            `[max_time, batch_size, num_classes]` or\n            `[batch_size, max_time, num_classes]` according to\n            the value of `time_major`.\n        sequence_length: A Tensor of shape `[batch_size]`. Time steps beyond\n            the respective sequence lengths will have zero losses.\n        average_across_timesteps (bool): If set, average the loss across\n            the time dimension. Must not set `average_across_timesteps`\n            and `sum_over_timesteps` at the same time.\n        average_across_batch (bool): If set, average the loss across the\n            batch dimension. Must not set `average_across_batch`'\n            and `sum_over_batch` at the same time.\n        sum_over_timesteps (bool): If set, sum the loss across the\n            time dimension. Must not set `average_across_timesteps`\n            and `sum_over_timesteps` at the same time.\n        sum_over_batch (bool): If set, sum the loss across the\n            batch dimension. Must not set `average_across_batch`\n            and `sum_over_batch` at the same time.\n        time_major (bool): The shape format of the inputs. If `True`,\n            :attr:`labels` and :attr:`logits` must have shape\n            `[max_time, batch_size, ...]`. If `False`\n            (default), they must have shape `[batch_size, max_time, ...]`.\n        name (str, optional): A name for the operation.\n\n    Returns:\n        A Tensor containing the loss, of rank 0, 1, or 2 depending on the\n        arguments :attr:`{average_across}/{sum_over}_{timesteps}/{batch}`.\n        For example:\n\n        - If :attr:`sum_over_timesteps` and :attr:`average_across_batch`  \\\n        are `True` (default), the return Tensor is of rank 0.\n\n        - If :attr:`average_across_batch` is `True` and other arguments are \\\n        `False`, the return Tensor is of shape `[max_time]`.\n\n    Example:\n\n        .. code-block:: python\n\n            embedder = WordEmbedder(vocab_size=data.vocab.size)\n            decoder = BasicRNNDecoder(vocab_size=data.vocab.size)\n            outputs, _, _ = decoder(\n                decoding_strategy='train_greedy',\n                inputs=embedder(data_batch['text_ids']),\n                sequence_length=data_batch['length']-1)\n\n            loss = sequence_sparse_softmax_cross_entropy(\n                labels=data_batch['text_ids'][:, 1:],\n                logits=outputs.logits,\n                sequence_length=data_batch['length']-1)\n\n    \"\"\"\n    with tf.name_scope(name, \"sequence_sparse_softmax_cross_entropy\"):\n        losses = tf.nn.sparse_softmax_cross_entropy_with_logits(\n            labels=labels, logits=logits)\n\n        losses = mask_and_reduce(\n            losses,\n            sequence_length,\n            rank=2,\n            average_across_batch=average_across_batch,\n            average_across_timesteps=average_across_timesteps,\n            sum_over_batch=sum_over_batch,\n            sum_over_timesteps=sum_over_timesteps,\n            time_major=time_major)\n\n        return losses\n\ndef sequence_sigmoid_cross_entropy(labels,\n                                   logits,\n                                   sequence_length,\n                                   average_across_batch=True,\n                                   average_across_timesteps=False,\n                                   average_across_classes=True,\n                                   sum_over_batch=False,\n                                   sum_over_timesteps=True,\n                                   sum_over_classes=False,\n                                   time_major=False,\n                                   stop_gradient_to_label=False,\n                                   name=None):\n    \"\"\"Computes sigmoid cross entropy for each time step of sequence\n    predictions.\n\n    Args:\n        labels: Target class distributions.\n\n            - If :attr:`time_major` is `False` (default), this must be a\\\n            Tensor of shape `[batch_size, max_time(, num_classes)]`.\n\n            - If `time_major` is `True`, this must be a Tensor of shape\\\n            `[max_time, batch_size(, num_classes)]`.\n\n            Each row of `labels` should be a valid probability\n            distribution, otherwise, the computation of the gradient will be\n            incorrect.\n        logits: Unscaled log probabilities having the same shape as with\n            :attr:`labels`.\n        sequence_length: A Tensor of shape `[batch_size]`. Time steps beyond\n            the respective sequence lengths will have zero losses.\n        average_across_timesteps (bool): If set, average the loss across\n            the time dimension. Must not set `average_across_timesteps`\n            and `sum_over_timesteps` at the same time.\n        average_across_batch (bool): If set, average the loss across the\n            batch dimension. Must not set `average_across_batch`'\n            and `sum_over_batch` at the same time.\n        average_across_classes (bool): If set, average the loss across the\n            class dimension (if exists). Must not set\n            `average_across_classes`' and `sum_over_classes` at\n            the same time. Ignored if :attr:`logits` is a 2D Tensor.\n        sum_over_timesteps (bool): If set, sum the loss across the\n            time dimension. Must not set `average_across_timesteps`\n            and `sum_over_timesteps` at the same time.\n        sum_over_batch (bool): If set, sum the loss across the\n            batch dimension. Must not set `average_across_batch`\n            and `sum_over_batch` at the same time.\n        sum_over_classes (bool): If set, sum the loss across the\n            class dimension. Must not set `average_across_classes`\n            and `sum_over_classes` at the same time. Ignored if\n            :attr:`logits` is a 2D Tensor.\n        time_major (bool): The shape format of the inputs. If `True`,\n            :attr:`labels` and :attr:`logits` must have shape\n            `[max_time, batch_size, ...]`. If `False`\n            (default), they must have shape `[batch_size, max_time, ...]`.\n        stop_gradient_to_label (bool): If set, gradient propagation to\n            :attr:`labels` will be disabled.\n        name (str, optional): A name for the operation.\n\n    Returns:\n        A Tensor containing the loss, of rank 0, 1, or 2 depending on the\n        arguments\n        :attr:`{average_across}/{sum_over}_{timesteps}/{batch}/{classes}`.\n        For example, if the class dimension does not exist, and\n\n        - If :attr:`sum_over_timesteps` and :attr:`average_across_batch`  \\\n        are `True` (default), the return Tensor is of rank 0.\n\n        - If :attr:`average_across_batch` is `True` and other arguments are \\\n        `False`, the return Tensor is of shape `[max_time]`.\n    \"\"\"\n\n    with tf.name_scope(name, \"sequence_sigmoid_cross_entropy\"):\n        if stop_gradient_to_label:\n            labels = tf.stop_gradient(labels)\n\n        losses = tf.nn.sigmoid_cross_entropy_with_logits(\n            labels=labels, logits=logits)\n\n        rank = shapes.get_rank(logits) or shapes.get_rank(labels)\n        if rank is None:\n            raise ValueError(\n                'Cannot determine the rank of `logits` or `labels`.')\n\n        losses = mask_and_reduce(\n            losses,\n            sequence_length,\n            rank=rank,\n            average_across_batch=average_across_batch,\n            average_across_timesteps=average_across_timesteps,\n            average_across_remaining=average_across_classes,\n            sum_over_batch=sum_over_batch,\n            sum_over_timesteps=sum_over_timesteps,\n            sum_over_remaining=sum_over_classes,\n            time_major=time_major)\n\n        return losses\n\ndef binary_sigmoid_cross_entropy(pos_logits=None,\n                                 neg_logits=None,\n                                 average_across_batch=True,\n                                 average_across_classes=True,\n                                 sum_over_batch=False,\n                                 sum_over_classes=False,\n                                 return_pos_neg_losses=False,\n                                 name=None):\n    \"\"\"Computes sigmoid cross entropy of binary predictions.\n\n    Args:\n        pos_logits: The logits of predicting positive on positive data. A\n            tensor of shape `[batch_size(, num_classes)]`.\n        neg_logits: The logits of predicting positive on negative data. A\n            tensor of shape `[batch_size(, num_classes)]`.\n        average_across_batch (bool): If set, average the loss across the\n            batch dimension. Must not set `average_across_batch`'\n            and `sum_over_batch` at the same time.\n        average_across_classes (bool): If set, average the loss across the\n            class dimension (if exists). Must not set\n            `average_across_classes`' and `sum_over_classes` at\n            the same time. Ignored if :attr:`logits` is a 1D Tensor.\n        sum_over_batch (bool): If set, sum the loss across the\n            batch dimension. Must not set `average_across_batch`\n            and `sum_over_batch` at the same time.\n        sum_over_classes (bool): If set, sum the loss across the\n            class dimension. Must not set `average_across_classes`\n            and `sum_over_classes` at the same time. Ignored if\n            :attr:`logits` is a 2D Tensor.\n        return_pos_neg_losses (bool): If set, additionally returns the losses\n            on :attr:`pos_logits` and :attr:`neg_logits`, respectively.\n        name (str, optional): A name for the operation.\n\n    Returns:\n        By default, a Tensor containing the loss, of rank 0, 1, or 2 depending\n        on the arguments :attr:`{average_across}/{sum_over}_{batch}/{classes}`.\n        For example:\n\n            - If :attr:`sum_over_batch` and :attr:`average_across_classes`  \\\n            are `True` (default), the return Tensor is of rank 0.\n\n            - If  arguments are `False`, the return Tensor is of shape \\\n            `[batch_size(, num_classes)]`.\n\n        If :attr:`return_pos_neg_losses` is `True`, returns a tuple\n        `(loss, pos_loss, neg_loss)`, where `loss` is the loss above;\n        `pos_loss` is the loss on `pos_logits` only; and\n        `neg_loss` is the loss on `neg_logits` only. They have\n        `loss = pos_loss + neg_loss`.\n    \"\"\"\n    with tf.name_scope(name, \"binary_sigmoid_cross_entropy\"):\n        average_axes, sum_axes = [], []\n        average_axes += [0] if average_across_batch else []\n        average_axes += [1] if average_across_classes else []\n        sum_axes += [0] if sum_over_batch else []\n        sum_axes += [1] if sum_over_classes else []\n\n        pos_loss = 0\n        if pos_logits is not None:\n            pos_loss = tf.nn.sigmoid_cross_entropy_with_logits(\n                logits=pos_logits, labels=tf.ones_like(pos_logits))\n\n            pos_loss = reduce_dimensions(pos_loss, average_axes, sum_axes)\n\n        neg_loss = 0\n        if neg_logits is not None:\n            neg_loss = tf.nn.sigmoid_cross_entropy_with_logits(\n                logits=neg_logits, labels=tf.zeros_like(neg_logits))\n\n            neg_loss = reduce_dimensions(neg_loss, average_axes, sum_axes)\n\n    loss = pos_loss + neg_loss\n\n    if return_pos_neg_losses:\n        return loss, pos_loss, neg_loss\n    else:\n        return loss\n\ndef binary_sigmoid_cross_entropy_with_clas(clas_fn,\n                                           pos_inputs=None,\n                                           neg_inputs=None,\n                                           average_across_batch=True,\n                                           average_across_classes=True,\n                                           sum_over_batch=False,\n                                           sum_over_classes=False,\n                                           return_pos_neg_losses=False,\n                                           name=None):\n    \"\"\"Computes sigmoid cross entropy of binary classifier.\n\n    .. role:: python(code)\n       :language: python\n\n    Args:\n        clas_fn: A callable takes data (e.g., :attr:`pos_inputs` and\n            :attr:`fake_inputs`) and returns the logits of being positive. The\n            signature of `clas_fn` must be:\n            :python:`logits (, ...) = clas_fn(inputs)`.\n            The return value of `clas_fn` can be the logits, or\n            a tuple where the logits are the first element.\n        pos_inputs: The positive data fed into `clas_fn`.\n        neg_inputs: The negative data fed into `clas_fn`.\n        average_across_batch (bool): If set, average the loss across the\n            batch dimension. Must not set `average_across_batch`'\n            and `sum_over_batch` at the same time.\n        average_across_classes (bool): If set, average the loss across the\n            class dimension (if exists). Must not set\n            `average_across_classes`' and `sum_over_classes` at\n            the same time. Ignored if :attr:`logits` is a 1D Tensor.\n        sum_over_batch (bool): If set, sum the loss across the\n            batch dimension. Must not set `average_across_batch`\n            and `sum_over_batch` at the same time.\n        sum_over_classes (bool): If set, sum the loss across the\n            class dimension. Must not set `average_across_classes`\n            and `sum_over_classes` at the same time. Ignored if\n            :attr:`logits` is a 2D Tensor.\n        return_pos_neg_losses (bool): If set, additionally returns the losses\n            on :attr:`pos_logits` and :attr:`neg_logits`, respectively.\n        name (str, optional): A name for the operation.\n\n    Returns:\n        By default, a Tensor containing the loss, of rank 0, 1, or 2 depending\n        on the arguments :attr:`{average_across}/{sum_over}_{batch}/{classes}`.\n        For example:\n\n            - If :attr:`sum_over_batch` and :attr:`average_across_classes`  \\\n            are `True` (default), the return Tensor is of rank 0.\n\n            - If  arguments are `False`, the return Tensor is of shape \\\n            `[batch_size(, num_classes)]`.\n\n        If :attr:`return_pos_neg_losses`=`True`, returns a tuple\n        `(loss, pos_loss, neg_loss)`, where `loss` is the loss above;\n        `pos_loss` is the loss on `pos_logits` only; and\n        `neg_loss` is the loss on `neg_logits` only. They have\n        `loss = pos_loss + neg_loss`.\n    \"\"\"\n    pos_logits = None\n    if pos_inputs is not None:\n        pos_logits = clas_fn(pos_inputs)\n        if isinstance(pos_logits, (list, tuple)):\n            pos_logits = pos_logits[0]\n\n    neg_logits = None\n    if neg_inputs is not None:\n        neg_logits = clas_fn(neg_inputs)\n        if isinstance(neg_logits, (list, tuple)):\n            neg_logits = neg_logits[0]\n\n    return binary_sigmoid_cross_entropy(\n        pos_logits=pos_logits,\n        neg_logits=neg_logits,\n        average_across_batch=average_across_batch,\n        average_across_classes=average_across_classes,\n        sum_over_batch=sum_over_batch,\n        sum_over_classes=sum_over_classes,\n        return_pos_neg_losses=return_pos_neg_losses,\n        name=name)\n"
  },
  {
    "path": "texar_repo/texar/losses/mle_losses_test.py",
    "content": "# -*- coding: utf-8 -*-\n#\n\"\"\"\nUnit tests for mle losses.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\n# pylint: disable=invalid-name\n\nimport numpy as np\n\nimport tensorflow as tf\n\nimport texar as tx\n\nclass MLELossesTest(tf.test.TestCase):\n    \"\"\"Tests mle losses.\n    \"\"\"\n\n    def setUp(self):\n        tf.test.TestCase.setUp(self)\n        self._batch_size = 64\n        self._max_time = 16\n        self._num_classes = 100\n        self._labels = tf.ones([self._batch_size, self._max_time],\n                               dtype=tf.int32)\n        one_hot_labels = tf.one_hot(\n            self._labels, self._num_classes, dtype=tf.float32)\n        self._one_hot_labels = tf.reshape(\n            one_hot_labels, [self._batch_size, self._max_time, -1])\n        self._logits = tf.random_uniform(\n            [self._batch_size, self._max_time, self._num_classes])\n        self._sequence_length = tf.random_uniform(\n            [self._batch_size], maxval=self._max_time, dtype=tf.int32)\n\n    def _test_sequence_loss(self, loss_fn, labels, logits, sequence_length):\n        with self.test_session() as sess:\n            loss = loss_fn(labels, logits, sequence_length)\n            rank = sess.run(tf.rank(loss))\n            self.assertEqual(rank, 0)\n\n            loss = loss_fn(\n                labels, logits, sequence_length, sum_over_timesteps=False)\n            rank = sess.run(tf.rank(loss))\n            self.assertEqual(rank, 1)\n            self.assertEqual(loss.shape, tf.TensorShape([self._max_time]))\n\n            loss = loss_fn(\n                labels, logits, sequence_length, sum_over_timesteps=False,\n                average_across_timesteps=True, average_across_batch=False)\n            rank = sess.run(tf.rank(loss))\n            self.assertEqual(rank, 1)\n            self.assertEqual(loss.shape, tf.TensorShape([self._batch_size]))\n\n            loss = loss_fn(\n                labels, logits, sequence_length, sum_over_timesteps=False,\n                average_across_batch=False)\n            rank = sess.run(tf.rank(loss))\n            self.assertEqual(rank, 2)\n            self.assertEqual(loss.shape,\n                             tf.TensorShape([self._batch_size, self._max_time]))\n\n            sequence_length_time = tf.random_uniform(\n                [self._max_time], maxval=self._max_time, dtype=tf.int32)\n            loss = loss_fn(\n                labels, logits, sequence_length_time, sum_over_timesteps=False,\n                average_across_batch=False, time_major=True)\n            self.assertEqual(loss.shape,\n                             tf.TensorShape([self._batch_size, self._max_time]))\n\n    def test_sequence_softmax_cross_entropy(self):\n        \"\"\"Tests `sequence_softmax_cross_entropy`\n        \"\"\"\n        self._test_sequence_loss(\n            tx.losses.sequence_softmax_cross_entropy,\n            self._one_hot_labels, self._logits, self._sequence_length)\n\n    def test_sequence_sparse_softmax_cross_entropy(self):\n        \"\"\"Tests `sequence_sparse_softmax_cross_entropy`\n        \"\"\"\n        self._test_sequence_loss(\n            tx.losses.sequence_sparse_softmax_cross_entropy,\n            self._labels, self._logits, self._sequence_length)\n\n    def test_sequence_sigmoid_cross_entropy(self):\n        \"\"\"Tests `texar.losses.test_sequence_sigmoid_cross_entropy`.\n        \"\"\"\n        self._test_sequence_loss(\n            tx.losses.sequence_sigmoid_cross_entropy,\n            self._one_hot_labels, self._logits, self._sequence_length)\n\n        self._test_sequence_loss(\n            tx.losses.sequence_sigmoid_cross_entropy,\n            self._one_hot_labels[:, :, 0],\n            self._logits[:, :, 0],\n            self._sequence_length)\n\n        labels = tf.placeholder(dtype=tf.int32, shape=None)\n        loss = tx.losses.sequence_sigmoid_cross_entropy(\n            logits=self._logits[:, :, 0],\n            labels=tf.to_float(labels),\n            sequence_length=self._sequence_length)\n        with self.test_session() as sess:\n            rank = sess.run(\n                tf.rank(loss),\n                feed_dict={labels: np.ones([self._batch_size, self._max_time])})\n            self.assertEqual(rank, 0)\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/losses/pg_losses.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nVarious loss functions for policy gradients.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom texar.losses.losses_utils import mask_and_reduce\nfrom texar.utils.shapes import get_rank\n\n# pylint: disable=too-many-arguments, protected-access\n\n__all__ = [\n    \"pg_loss_with_logits\",\n    \"pg_loss_with_log_probs\"\n]\n\ndef pg_loss_with_logits(actions,\n                        logits,\n                        advantages,\n                        rank=None,\n                        batched=False,\n                        sequence_length=None,\n                        average_across_batch=True,\n                        average_across_timesteps=False,\n                        average_across_remaining=False,\n                        sum_over_batch=False,\n                        sum_over_timesteps=True,\n                        sum_over_remaining=True,\n                        time_major=False):\n    \"\"\"Policy gradient loss with logits. Used for discrete actions.\n\n    `pg_loss = reduce( advantages * -log_prob( actions )  )`,\n    where `advantages` and `actions` do not back-propagate gradients.\n\n    All arguments except :attr:`logits` and :attr:`actions` are the same with\n    :func:`pg_loss_with_log_probs`.\n\n    Args:\n        actions: Tensor of shape\n            `[(batch_size,) max_time, d_3, ..., d_rank]` and of dtype\n            `int32` or `int64`.\n            The rank of the Tensor is specified with :attr:`rank`.\n\n            The batch dimension exists only if :attr:`batched` is `True`.\n\n            The batch and time dimensions\n            are exchanged, i.e., `[max_time, batch_size, ...]` if\n            :attr:`time_major` is `True`.\n        logits: Unscaled log probabilities of shape\n            `[(batch_size,) max_time, d_3, ..., d_{rank+1}]`\n            and dtype `float32` or `float64`.\n            The batch and time dimensions are exchanged if `time_major`\n            is `True`.\n        advantages: Tensor of shape\n            `[(batch_size,) max_time, d_3, ..., d_rank]` and\n            dtype `float32` or `float64`.\n            The batch and time dimensions are exchanged if `time_major`\n            is `True`.\n        rank (int, optional): The rank of :attr:`actions`.\n            If `None` (default), rank is automatically inferred from\n            `actions` or `advantages`. If the inference fails,\n            `rank` is set to 1 if :attr:`batched` is `False`,\n            and set to 2 if :attr:`batched` is `True`.\n        batched (bool): `True` if the inputs are batched.\n        sequence_length (optional): A Tensor of shape `[batch_size]`.\n            Time steps beyond the respective sequence lengths will have zero\n            losses. Used if :attr:`batched` is `True`.\n        average_across_timesteps (bool): If set, average the loss across\n            the time dimension. Must not set `average_across_timesteps`\n            and `sum_over_timesteps` at the same time.\n        average_across_batch (bool): If set, average the loss across the\n            batch dimension. Must not set `average_across_batch`'\n            and `sum_over_batch` at the same time.\n            Ignored if `batched` is `False`.\n        average_across_remaining (bool): If set, average the sequence across the\n            remaining dimensions. Must not set `average_across_remaining`'\n            and `sum_over_remaining` at the same time. Ignored if\n            no more dimensions other than the batch and time dimensions.\n        sum_over_timesteps (bool): If set, sum the loss across the\n            time dimension. Must not set `average_across_timesteps`\n            and `sum_over_timesteps` at the same time.\n        sum_over_batch (bool): If set, sum the loss across the\n            batch dimension. Must not set `average_across_batch`\n            and `sum_over_batch` at the same time.\n            Ignored if `batched` is `False`.\n        sum_over_remaining (bool): If set, sum the loss across the\n            remaining dimension. Must not set `average_across_remaining`\n            and `sum_over_remaining` at the same time. Ignored if\n            no more dimensions other than the batch and time dimensions.\n        time_major (bool): The shape format of the inputs. If `True`,\n            :attr:`logits`, :attr:`actions` and :attr:`advantages` must\n            have shape `[max_time, batch_size, ...]`. If `False` (default),\n            they must have shape `[batch_size, max_time, ...]`.\n            Ignored if `batched` is `False`.\n\n    Returns:\n        A Tensor containing the loss to minimize, whose rank depends on the\n        reduce arguments. For example, the batch dimension is reduced if\n        either :attr:`average_across_batch` or :attr:`sum_over_batch` is\n        `True`, which decreases the rank of output tensor by 1.\n    \"\"\"\n    actions = tf.stop_gradient(actions)\n    neg_log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits(\n        logits=logits, labels=actions)\n    return pg_loss_with_log_probs(\n        log_probs=-neg_log_probs,\n        advantages=advantages,\n        rank=rank,\n        batched=batched,\n        sequence_length=sequence_length,\n        average_across_batch=average_across_batch,\n        average_across_timesteps=average_across_timesteps,\n        average_across_remaining=average_across_remaining,\n        sum_over_batch=sum_over_batch,\n        sum_over_timesteps=sum_over_timesteps,\n        sum_over_remaining=sum_over_remaining,\n        time_major=time_major)\n\ndef pg_loss_with_log_probs(log_probs,\n                           advantages,\n                           rank=None,\n                           batched=False,\n                           sequence_length=None,\n                           average_across_batch=True,\n                           average_across_timesteps=False,\n                           average_across_remaining=False,\n                           sum_over_batch=False,\n                           sum_over_timesteps=True,\n                           sum_over_remaining=True,\n                           time_major=False):\n    \"\"\"Policy gradient loss with log probs of actions.\n\n    `pg_loss = reduce( advantages * -log_probs )`,\n    where `advantages` does not back-propagate gradients.\n\n    All arguments except :attr:`log_probs` are the same as\n    :func:`pg_loss_with_logits`.\n\n    Args:\n        log_probs: Log probabilities of shape\n            `[(batch_size,) max_time, ..., d_rank]` and dtype `float32`\n            or `float64`. The rank of the Tensor is specified\n            with :attr:`rank`.\n\n            The batch dimension exists only if :attr:`batched` is `True`.\n\n            The batch and time dimensions are exchanged, i.e.,\n            `[max_time, batch_size, ...]` if :attr:`time_major` is `True`.\n        advantages: Tensor of shape\n            `[(batch_size,) max_time, d_3, ..., d_rank]` and\n            dtype `float32` or `float64`.\n            The batch dimension exists only if `batched` is `True`.\n            The batch and time dimensions\n            are exchanged if `time_major` is `True`.\n        rank (int, optional): The rank of :attr:`log_probs`.\n            If `None` (default), rank is automatically inferred from\n            `log_probs` or `advantages`. If the inference fails,\n            `rank` is set to 1 if `batched``==False`,\n            and set to 2 if `batched``==True`.\n        batched (bool): `True` if the inputs are batched.\n        sequence_length (optional): A Tensor of shape `[batch_size]`.\n            Time steps beyond the respective sequence lengths will have zero\n            losses. Used if :attr:`batched` is `True`.\n        average_across_timesteps (bool): If set, average the loss across\n            the time dimension. Must not set `average_across_timesteps`\n            and `sum_over_timesteps` at the same time.\n        average_across_batch (bool): If set, average the loss across the\n            batch dimension. Must not set `average_across_batch`'\n            and `sum_over_batch` at the same time.\n            Ignored if `batched` is `False`.\n        average_across_remaining (bool): If set, average the sequence across the\n            remaining dimensions. Must not set `average_across_remaining`'\n            and `sum_over_remaining` at the same time. Ignored if\n            no more dimensions other than the batch and time dimensions.\n        sum_over_timesteps (bool): If set, sum the loss across the\n            time dimension. Must not set `average_across_timesteps`\n            and `sum_over_timesteps` at the same time.\n        sum_over_batch (bool): If set, sum the loss across the\n            batch dimension. Must not set `average_across_batch`\n            and `sum_over_batch` at the same time.\n            Ignored if `batched` is `False`.\n        sum_over_remaining (bool): If set, sum the loss across the\n            remaining dimension. Must not set `average_across_remaining`\n            and `sum_over_remaining` at the same time. Ignored if\n            no more dimensions other than the batch and time dimensions.\n        time_major (bool): The shape format of the inputs. If `True`,\n            :attr:`log_probs` and :attr:`advantages` must have shape\n            `[max_time, batch_size, ...]`. If `False` (default),\n            they must have shape `[batch_size, max_time, ...]`.\n            Ignored if :attr:`batched` is `False`.\n\n    Returns:\n        A Tensor containing the loss to minimize, whose rank depends on the\n        reduce arguments. For example, the batch dimension is reduced if\n        either :attr:`average_across_batch` or :attr:`sum_over_batch` is\n        `True`, which decreases the rank of output tensor by 1.\n    \"\"\"\n    advantages = tf.stop_gradient(advantages)\n\n    losses = -log_probs * advantages\n\n    if rank is None:\n        rank = get_rank(log_probs) or get_rank(advantages)\n    if rank is None:\n        rank = 2 if batched else 1\n\n    if batched:\n        losses = mask_and_reduce(\n            losses,\n            sequence_length,\n            rank=rank,\n            average_across_batch=average_across_batch,\n            average_across_timesteps=average_across_timesteps,\n            average_across_remaining=average_across_remaining,\n            sum_over_batch=sum_over_batch,\n            sum_over_timesteps=sum_over_timesteps,\n            sum_over_remaining=sum_over_remaining,\n            time_major=time_major)\n    elif rank > 1:\n        if average_across_remaining and sum_over_remaining:\n            raise ValueError(\"Only one of `average_across_remaining` and \"\n                             \"`sum_over_remaining` can be set.\")\n        if average_across_remaining:\n            losses = tf.reduce_mean(losses, axis=range(1, rank))\n        elif sum_over_remaining:\n            losses = tf.reduce_sum(losses, axis=range(1, rank))\n\n    if not batched:\n        if average_across_timesteps and sum_over_timesteps:\n            raise ValueError(\"Only one of `average_across_timesteps` and \"\n                             \"`sum_over_timesteps` can be set.\")\n        if average_across_timesteps:\n            losses = tf.reduce_mean(losses)\n        elif sum_over_timesteps:\n            losses = tf.reduce_mean(losses)\n\n    return losses\n"
  },
  {
    "path": "texar_repo/texar/losses/rewards.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nVarious reward related functions.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport numpy as np\n\nimport tensorflow as tf\n\nfrom texar.utils.shapes import mask_sequences\n\n# pylint: disable=invalid-name, too-many-arguments, no-member\n\n__all__ = [\n    \"discount_reward\",\n    \"_discount_reward_py_1d\",\n    \"_discount_reward_tensor_1d\",\n    \"_discount_reward_py_2d\",\n    \"_discount_reward_tensor_2d\"\n]\n\ndef discount_reward(reward,\n                    sequence_length=None,\n                    discount=1.,\n                    normalize=False,\n                    dtype=None,\n                    tensor_rank=1):\n    \"\"\"Computes discounted reward.\n\n    :attr:`reward` and :attr:`sequence_length` can be either Tensors or python\n    arrays. If both are python array (or `None`), the return will be a python\n    array as well. Otherwise tf Tensors are returned.\n\n    Args:\n        reward: A Tensor or python array. Can be 1D with shape `[batch_size]`,\n            or 2D with shape `[batch_size, max_time]`.\n        sequence_length (optional): A Tensor or python array of shape\n            `[batch_size]`. Time steps beyond the respective sequence lengths\n            will be masked. Required if :attr:`reward` is 1D.\n        discount (float): A scalar. The discount factor.\n        normalize (bool): Whether to normalize the discounted reward, by\n            `(discounted_reward - mean) / std`. Here `mean` and `std` are\n            over all time steps and all samples in the batch.\n        dtype (dtype): Type of :attr:`reward`. If `None`, infer from\n            `reward` automatically.\n        tensor_rank (int): The number of dimensions of :attr:`reward`.\n            Default is 1, i.e., :attr:`reward` is a 1D Tensor consisting\n            of a batch dimension. Ignored if :attr:`reward`\n            and :attr:`sequence_length` are python arrays (or `None`).\n\n    Returns:\n        A 2D Tensor or python array of the discounted reward.\n\n        If :attr:`reward` and :attr:`sequence_length` are python\n        arrays (or `None`), the returned value is a python array as well.\n\n\n    Example:\n\n        .. code-block:: python\n\n            r = [2., 1.]\n            seq_length = [3, 2]\n            discounted_r = discount_reward(r, seq_length, discount=0.1)\n            # discounted_r == [[2. * 0.1^2, 2. * 0.1, 2.],\n            #                  [1. * 0.1,   1.,       0.]]\n\n            r = [[3., 4., 5.], [6., 7., 0.]]\n            seq_length = [3, 2]\n            discounted_r = discount_reward(r, seq_length, discount=0.1)\n            # discounted_r == [[3. + 4.*0.1 + 5.*0.1^2, 4. + 5.*0.1, 5.],\n            #                  [6. + 7.*0.1,            7.,          0.]]\n    \"\"\"\n    is_tensor = tf.contrib.framework.is_tensor\n    if is_tensor(reward) or is_tensor(sequence_length):\n        if tensor_rank == 1:\n            disc_reward = _discount_reward_tensor_1d(\n                reward, sequence_length, discount, dtype)\n        elif tensor_rank == 2:\n            disc_reward = _discount_reward_tensor_2d(\n                reward, sequence_length, discount, dtype)\n        else:\n            raise ValueError(\"`tensor_rank` can only be 1 or 2.\")\n\n        if normalize:\n            mu, var = tf.nn.moments(disc_reward, axes=[0, 1], keep_dims=True)\n            disc_reward = (disc_reward - mu) / (tf.sqrt(var) + 1e-8)\n    else:\n        reward = np.array(reward)\n        tensor_rank = reward.ndim\n        if tensor_rank == 1:\n            disc_reward = _discount_reward_py_1d(\n                reward, sequence_length, discount, dtype)\n        elif tensor_rank == 2:\n            disc_reward = _discount_reward_py_2d(\n                reward, sequence_length, discount, dtype)\n        else:\n            raise ValueError(\"`reward` can only be 1D or 2D.\")\n\n        if normalize:\n            mu = np.mean(disc_reward)\n            std = np.std(disc_reward)\n            disc_reward = (disc_reward - mu) / (std + 1e-8)\n\n    return disc_reward\n\ndef _discount_reward_py_1d(reward, sequence_length, discount=1., dtype=None):\n    if sequence_length is None:\n        raise ValueError('sequence_length must not be `None` for 1D reward.')\n\n    reward = np.array(reward)\n    sequence_length = np.array(sequence_length)\n\n    batch_size = reward.shape[0]\n    max_seq_length = np.max(sequence_length)\n    dtype = dtype or reward.dtype\n\n    if discount == 1.:\n        dmat = np.ones([batch_size, max_seq_length], dtype=dtype)\n    else:\n        steps = np.tile(np.arange(max_seq_length), [batch_size, 1])\n        mask = np.asarray(steps < (sequence_length-1)[:, None], dtype=dtype)\n        # Make each row = [discount, ..., discount, 1, ..., 1]\n        dmat = mask * discount + (1 - mask)\n        dmat = np.cumprod(dmat[:, ::-1], axis=1)[:, ::-1]\n\n    disc_reward = dmat * reward[:, None]\n    disc_reward = mask_sequences(disc_reward, sequence_length, dtype=dtype)\n    #mask = np.asarray(steps < sequence_length[:, None], dtype=dtype)\n    #disc_reward = mask * disc_reward\n\n    return disc_reward\n\ndef _discount_reward_tensor_1d(reward, sequence_length,\n                               discount=1., dtype=None):\n    if sequence_length is None:\n        raise ValueError('sequence_length must not be `None` for 1D reward.')\n\n    batch_size = tf.shape(reward)[0]\n    max_seq_length = tf.reduce_max(sequence_length)\n    dtype = dtype or reward.dtype\n\n    if discount == 1.:\n        dmat = tf.ones(\n            tf.concat([[batch_size], [max_seq_length]], 0), dtype=dtype)\n    else:\n        mask = tf.sequence_mask(sequence_length, dtype=dtype)\n        mask = tf.concat([mask[:, 1:], tf.zeros_like(mask[:, -1:])], axis=1)\n        # Make each row = [discount, ..., discount, 1, ..., 1]\n        dmat = mask * discount + (1 - mask)\n        dmat = tf.cumprod(dmat, axis=1, reverse=True)\n\n    disc_reward = dmat * tf.expand_dims(reward, -1)\n    disc_reward = mask_sequences(\n        disc_reward, sequence_length, dtype=dtype, tensor_rank=2)\n\n    return disc_reward\n\ndef _discount_reward_py_2d(reward, sequence_length=None,\n                           discount=1., dtype=None):\n    if sequence_length is not None:\n        reward = mask_sequences(reward, sequence_length, dtype=dtype)\n\n    dtype = dtype or reward.dtype\n\n    if discount == 1.:\n        disc_reward = np.cumsum(\n            reward[:, ::-1], axis=1, dtype=dtype)[:, ::-1]\n    else:\n        disc_reward = np.copy(reward)\n        for i in range(reward.shape[1]-2, -1, -1):\n            disc_reward[:, i] += disc_reward[:, i+1] * discount\n\n    return disc_reward\n\ndef _discount_reward_tensor_2d(reward, sequence_length=None,\n                               discount=1., dtype=None):\n    if sequence_length is not None:\n        reward = mask_sequences(\n            reward, sequence_length, dtype=dtype, tensor_rank=2)\n\n    if discount == 1.:\n        disc_reward = tf.cumsum(reward, axis=1, reverse=True)\n    else:\n        # [max_time, batch_size]\n        rev_reward_T = tf.transpose(tf.reverse(reward, [1]), [1, 0])\n        rev_reward_T_cum = tf.scan(\n            fn=lambda acc, cur: cur + discount * acc,\n            elems=rev_reward_T,\n            initializer=tf.zeros_like(reward[:, 1]),\n            back_prop=False)\n        disc_reward = tf.reverse(\n            tf.transpose(rev_reward_T_cum, [1, 0]), [1])\n\n    return disc_reward\n"
  },
  {
    "path": "texar_repo/texar/losses/rewards_test.py",
    "content": "\"\"\"\nUnit tests for RL rewards.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\n# pylint: disable=invalid-name, no-member\n\nimport numpy as np\n\nimport tensorflow as tf\n\nfrom texar.losses.rewards import \\\n        _discount_reward_tensor_2d, _discount_reward_tensor_1d, \\\n        _discount_reward_py_1d, _discount_reward_py_2d, \\\n        discount_reward\n\nclass RewardTest(tf.test.TestCase):\n    \"\"\"Tests reward related functions.\n    \"\"\"\n\n    def test_discount_reward(self):\n        \"\"\"Tests :func:`texar.losses.rewards.discount_reward`\n        \"\"\"\n        # 1D\n        reward = np.ones([2], dtype=np.float64)\n        sequence_length = [3, 5]\n\n        discounted_reward = discount_reward(\n            reward, sequence_length, discount=1.)\n        discounted_reward_n = discount_reward(\n            reward, sequence_length, discount=.1, normalize=True)\n\n        discounted_reward_ = discount_reward(\n            tf.constant(reward, dtype=tf.float64),\n            sequence_length, discount=1.)\n        discounted_reward_n_ = discount_reward(\n            tf.constant(reward, dtype=tf.float64),\n            sequence_length, discount=.1, normalize=True)\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n\n            r, r_n = sess.run([discounted_reward_, discounted_reward_n_])\n\n            np.testing.assert_array_almost_equal(\n                discounted_reward, r, decimal=6)\n            np.testing.assert_array_almost_equal(\n                discounted_reward_n, r_n, decimal=6)\n\n        # 2D\n        reward = np.ones([2, 10], dtype=np.float64)\n        sequence_length = [5, 10]\n\n        discounted_reward = discount_reward(\n            reward, sequence_length, discount=1.)\n        discounted_reward_n = discount_reward(\n            reward, sequence_length, discount=.1, normalize=True)\n\n        discounted_reward_ = discount_reward(\n            tf.constant(reward, dtype=tf.float64), sequence_length,\n            discount=1., tensor_rank=2)\n        discounted_reward_n_ = discount_reward(\n            tf.constant(reward, dtype=tf.float64), sequence_length,\n            discount=.1, tensor_rank=2, normalize=True)\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n\n            r, r_n = sess.run([discounted_reward_, discounted_reward_n_])\n\n            np.testing.assert_array_almost_equal(\n                discounted_reward, r, decimal=6)\n            np.testing.assert_array_almost_equal(\n                discounted_reward_n, r_n, decimal=6)\n\n    def test_discount_reward_py_1d(self):\n        \"\"\"Tests :func:`texar.losses.rewards._discount_reward_py_1d`\n        \"\"\"\n        reward = np.ones([2], dtype=np.float64)\n        sequence_length = [3, 5]\n\n        discounted_reward_1 = _discount_reward_py_1d(\n            reward, sequence_length, discount=1.)\n\n        discounted_reward_2 = _discount_reward_py_1d(\n            reward, sequence_length, discount=.1)\n\n        r = discounted_reward_1\n        for i in range(5):\n            if i < 3:\n                self.assertEqual(r[0, i], 1)\n            else:\n                self.assertEqual(r[0, i], 0)\n            self.assertEqual(r[1, i], 1)\n\n        r = discounted_reward_2\n        for i in range(5):\n            if i < 3:\n                self.assertAlmostEqual(r[0, i], 0.1**(2-i))\n            else:\n                self.assertAlmostEqual(r[0, i], 0)\n            self.assertAlmostEqual(r[1, i], 0.1**(4-i))\n\n    def test_discount_reward_tensor_1d(self):\n        \"\"\"Tests :func:`texar.losses.rewards._discount_reward_tensor_1d`\n        \"\"\"\n        reward = tf.ones([2], dtype=tf.float64)\n        sequence_length = [3, 5]\n\n        discounted_reward_1 = _discount_reward_tensor_1d(\n            reward, sequence_length, discount=1.)\n\n        discounted_reward_2 = _discount_reward_tensor_1d(\n            reward, sequence_length, discount=.1)\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n\n            r = sess.run(discounted_reward_1)\n            for i in range(5):\n                if i < 3:\n                    self.assertEqual(r[0, i], 1)\n                else:\n                    self.assertEqual(r[0, i], 0)\n                self.assertEqual(r[1, i], 1)\n\n            r = sess.run(discounted_reward_2)\n            for i in range(5):\n                if i < 3:\n                    self.assertAlmostEqual(r[0, i], 0.1**(2-i))\n                else:\n                    self.assertAlmostEqual(r[0, i], 0)\n                self.assertAlmostEqual(r[1, i], 0.1**(4-i))\n\n    def test_discount_reward_py_2d(self):\n        \"\"\"Tests :func:`texar.losses.rewards._discount_reward_py_2d`\n        \"\"\"\n        reward = np.ones([2, 10], dtype=np.float64)\n        sequence_length = [5, 10]\n\n        discounted_reward_1 = _discount_reward_py_2d(\n            reward, sequence_length, discount=1.)\n\n        discounted_reward_2 = _discount_reward_py_2d(\n            reward, sequence_length, discount=.1)\n\n        r = discounted_reward_1\n        for i in range(10):\n            if i < 5:\n                self.assertEqual(r[0, i], 5 - i)\n            else:\n                self.assertEqual(r[0, i], 0)\n            self.assertEqual(r[1, i], 10 - i)\n\n        r = discounted_reward_2\n        for i in range(10):\n            if i < 5:\n                self.assertEqual(r[0, i], int(11111./10**i) / 10**(4-i))\n            else:\n                self.assertEqual(r[0, i], 0)\n            self.assertEqual(r[1, i], int(1111111111./10**i) / 10**(9-i))\n\n    def test_discount_reward_tensor_2d(self):\n        \"\"\"Tests :func:`texar.losses.rewards._discount_reward_tensor_2d`\n        \"\"\"\n        reward = tf.ones([2, 10], dtype=tf.float64)\n        sequence_length = [5, 10]\n\n        discounted_reward_1 = _discount_reward_tensor_2d(\n            reward, sequence_length, discount=1.)\n\n        discounted_reward_2 = _discount_reward_tensor_2d(\n            reward, sequence_length, discount=.1)\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n\n            r = sess.run(discounted_reward_1)\n            for i in range(10):\n                if i < 5:\n                    self.assertEqual(r[0, i], 5 - i)\n                else:\n                    self.assertEqual(r[0, i], 0)\n                self.assertEqual(r[1, i], 10 - i)\n\n            r = sess.run(discounted_reward_2)\n            for i in range(10):\n                if i < 5:\n                    self.assertEqual(r[0, i], int(11111./10**i) / 10**(4-i))\n                else:\n                    self.assertEqual(r[0, i], 0)\n                self.assertEqual(r[1, i], int(1111111111./10**i) / 10**(9-i))\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/losses/rl_losses.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nVarious RL losses\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom texar.losses.mle_losses import _mask_sequences\n\n\ndef reinforce_loss(sample_fn,\n                   global_reward_fn,\n                   local_reward_fn=None,\n                   num_samples=1):\n    \"\"\"Computes REINFORCE loss with global and local rewards.\n\n    Args:\n        sample_fn: A callable that takes :attr:`num_samples` and returns\n            `(samples, probabilities, sequence_lengths)`, where:\n\n            `samples` is a Tensor of shape `[num_samples, max_sequence_length]`\n            containing the generated samples;\n\n            `probabilities` is a Tensor of shape\n            `[num_samples, max_sequence_length]` containing the probabilities of\n            generating each position of the samples. Probabilities beyond the\n            respective sequence lengths are ignored.\n\n            `sequence_lengths` is a Tensor of shape `[num_samples]` containing\n            the length of each samples.\n        global_reward_fn: A callable that takes `(samples, sequence_lengths)`\n            and returns a Tensor of shape `[num_samples]` containing the reward\n            of each of the samples.\n        local_reward_fn (optional): A callable that takes\n            `(samples, sequence_lengths)` and returns a Tensor of shape\n            `[num_samples, max_sequence_length]` containing the local reward\n            at each time step of samples.\n        num_samples (int scalar Tensor): the number of sequences to sample.\n\n    Returns:\n        A scalar Tensor of the REINFORCE loss.\n    \"\"\"\n\n    # shape = [batch, length]\n    sequences, probs, seq_lens = sample_fn(num_samples)\n    batch, _ = tf.shape(sequences)\n    rewards_local = tf.constant(0., dtype=probs.dtype, shape=probs.shape)\n    if local_reward_fn is not None:\n        rewards_local = local_reward_fn(sequences, seq_lens)\n\n    # shape = [batch, ]\n    rewards_global = global_reward_fn(sequences, seq_lens)\n    # add broadcast to rewards_global to match the shape of rewards_local\n    rewards = rewards_local + tf.reshape(rewards_global, [batch, 1])\n\n    eps = 1e-12\n    log_probs = _mask_sequences(tf.log(probs + eps), seq_lens)\n    loss = - tf.reduce_mean(\n        tf.reduce_sum(log_probs * rewards, axis=1) / seq_lens)\n    return loss\n\n\ndef reinforce_loss_with_MCtree(sample_fn,   # pylint: disable=invalid-name\n                               global_reward_fn,\n                               local_reward_fn=None,\n                               num_samples=1):\n    \"\"\"Computes REINFORCE loss with Monte Carlo tree search.\n\n    Args:\n        sample_fn: A callable that takes :attr:`num_samples`, 'given_actions'\n            and returns `(samples, probabilities, sequence_lengths)`, where:\n\n            `samples` is a Tensor of shape `[num_samples, max_sequence_length]`\n            containing the generated samples;\n\n            `probabilities` is a Tensor of shape\n            `[num_samples, max_sequence_length]` containing the probabilities of\n            generating each position of the samples. Probabilities beyond the\n            respective sequence lengths are ignored.\n\n            `sequence_lengths` is a Tensor of shape `[num_samples]` containing\n            the length of each samples.\n        global_reward_fn: A callable that takes `(samples, sequence_lengths)`\n            and returns a Tensor of shape `[num_samples]` containing the reward\n            of each of the samples.\n        local_reward_fn (optional): A callable that takes\n            `(samples, sequence_lengths)` and returns a Tensor of shape\n            `[num_samples, max_sequence_length]` containing the local reward\n            at each time step of samples.\n        num_samples (int scalar Tensor): the number of sequences to sample.\n\n    Returns:\n        A scalar Tensor of the REINFORCE loss.\n    \"\"\"\n    raise NotImplementedError\n"
  },
  {
    "path": "texar_repo/texar/models/__init__.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nModules of texar library models.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=wildcard-import\n\nfrom texar.models.model_base import *\nfrom texar.models.seq2seq import *\n"
  },
  {
    "path": "texar_repo/texar/models/model_base.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nBase class for models.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom texar import HParams\n\n# pylint: disable=too-many-arguments\n\n__all__ = [\n    \"ModelBase\"\n]\n\nclass ModelBase(object):\n    \"\"\"Base class inherited by all model classes.\n\n    A model class implements interfaces that are compatible with\n    :tf_main:`TF Estimator <estimator/Estimator>`. In particular,\n    :meth:`_build` implements the\n    :tf_main:`model_fn <estimator/Estimator#__init__>` interface; and\n    :meth:`get_input_fn` is for the :attr:`input_fn` interface.\n\n    .. document private functions\n    .. automethod:: _build\n    \"\"\"\n\n    def __init__(self, hparams=None):\n        self._hparams = HParams(hparams, self.default_hparams(),\n                                allow_new_hparam=True)\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n        \"\"\"\n        hparams = {\n            \"name\": \"model\"\n        }\n        return hparams\n\n    def __call__(self, features, labels, params, mode, config=None):\n        \"\"\"Used for the :tf_main:`model_fn <estimator/Estimator#__init__>`\n        argument when constructing\n        :tf_main:`tf.estimator.Estimator <estimator/Estimator>`.\n        \"\"\"\n        return self._build(features, labels, params, mode, config=config)\n\n    def _build(self, features, labels, params, mode, config=None):\n        \"\"\"Used for the :tf_main:`model_fn <estimator/Estimator#__init__>`\n        argument when constructing\n        :tf_main:`tf.estimator.Estimator <estimator/Estimator>`.\n        \"\"\"\n        raise NotImplementedError\n\n    def get_input_fn(self, *args, **kwargs):\n        \"\"\"Returns the :attr:`input_fn` function that constructs the input\n        data, used in :tf_main:`tf.estimator.Estimator <estimator/Estimator>`.\n        \"\"\"\n        raise NotImplementedError\n\n    @property\n    def hparams(self):\n        \"\"\"A :class:`~texar.HParams` instance. The hyperparameters\n        of the module.\n        \"\"\"\n        return self._hparams\n\n"
  },
  {
    "path": "texar_repo/texar/models/seq2seq/__init__.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nModules of texar library seq2seq models.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=wildcard-import\n\nfrom texar.models.seq2seq.seq2seq_base import *\nfrom texar.models.seq2seq.basic_seq2seq import *\n"
  },
  {
    "path": "texar_repo/texar/models/seq2seq/basic_seq2seq.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nThe basic seq2seq model without attention.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom texar.models.seq2seq.seq2seq_base import Seq2seqBase\nfrom texar.modules.decoders.beam_search_decode import beam_search_decode\nfrom texar.utils import utils\nfrom texar.utils.shapes import get_batch_size\n\n# pylint: disable=protected-access, too-many-arguments, unused-argument\n\n__all__ = [\n    \"BasicSeq2seq\"\n]\n\nclass BasicSeq2seq(Seq2seqBase):\n    \"\"\"The basic seq2seq model (without attention).\n\n    Example:\n\n        .. code-block:: python\n\n            model = BasicSeq2seq(data_hparams, model_hparams)\n            exor = tx.run.Executor(\n                model=model,\n                data_hparams=data_hparams,\n                config=run_config)\n            exor.train_and_evaluate(\n                max_train_steps=10000,\n                eval_steps=100)\n\n    .. document private functions\n    .. automethod:: _build\n    \"\"\"\n\n    def __init__(self, data_hparams, hparams=None):\n        Seq2seqBase.__init__(self, data_hparams, hparams=hparams)\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        Same as :meth:`~texar.models.Seq2seqBase.default_hparams` of\n        :class:`~texar.models.Seq2seqBase`.\n        \"\"\"\n        hparams = Seq2seqBase.default_hparams()\n        hparams.update({\n            \"name\": \"basic_seq2seq\"\n        })\n        return hparams\n\n    def _build_decoder(self):\n        kwargs = {\n            \"vocab_size\": self._tgt_vocab.size,\n            \"hparams\": self._hparams.decoder_hparams.todict()\n        }\n        self._decoder = utils.check_or_get_instance(\n            self._hparams.decoder, kwargs,\n            [\"texar.modules\", \"texar.custom\"])\n\n    def _get_predictions(self, decoder_results, features, labels, loss=None):\n        preds = {}\n\n        preds.update(features)\n\n        if labels is not None:\n            preds.update(labels)\n\n        preds.update(utils.flatten_dict({'decode': decoder_results}))\n        preds['decode.outputs.sample'] = self._tgt_vocab.map_ids_to_tokens(\n            preds['decode.outputs.sample_id'])\n\n        if loss is not None:\n            preds['loss'] = loss\n\n        return preds\n\n    def embed_source(self, features, labels, mode):\n        \"\"\"Embeds the inputs.\n        \"\"\"\n        return self._src_embedder(ids=features[\"source_text_ids\"], mode=mode)\n\n    def embed_target(self, features, labels, mode):\n        \"\"\"Embeds the target inputs. Used in training.\n        \"\"\"\n        return self._tgt_embedder(ids=labels[\"target_text_ids\"], mode=mode)\n\n    def encode(self, features, labels, mode):\n        \"\"\"Encodes the inputs.\n        \"\"\"\n        embedded_source = self.embed_source(features, labels, mode)\n\n        outputs, final_state = self._encoder(\n            embedded_source,\n            sequence_length=features[\"source_length\"],\n            mode=mode)\n\n        return {'outputs': outputs, 'final_state': final_state}\n\n    def _connect(self, encoder_results, features, labels, mode):\n        \"\"\"Transforms encoder final state into decoder initial state.\n        \"\"\"\n        enc_state = encoder_results[\"final_state\"]\n        possible_kwargs = {\n            \"inputs\": enc_state,\n            \"batch_size\": get_batch_size(enc_state)\n        }\n        outputs = utils.call_function_with_redundant_kwargs(\n            self._connector._build, possible_kwargs)\n        return outputs\n\n    def _decode_train(self, initial_state, encoder_results, features,\n                      labels, mode):\n        return self._decoder(\n            initial_state=initial_state,\n            decoding_strategy=self._hparams.decoding_strategy_train,\n            inputs=self.embed_target(features, labels, mode),\n            sequence_length=labels['target_length']-1,\n            mode=mode)\n\n    def _decode_infer(self, initial_state, encoder_results, features,\n                      labels, mode):\n        start_token = self._tgt_vocab.bos_token_id\n        start_tokens = tf.ones_like(features['source_length']) * start_token\n\n        max_l = self._decoder.hparams.max_decoding_length_infer\n\n        if self._hparams.beam_search_width > 1:\n            return beam_search_decode(\n                decoder_or_cell=self._decoder,\n                embedding=self._tgt_embedder.embedding,\n                start_tokens=start_tokens,\n                end_token=self._tgt_vocab.eos_token_id,\n                beam_width=self._hparams.beam_search_width,\n                initial_state=initial_state,\n                max_decoding_length=max_l)\n        else:\n            return self._decoder(\n                initial_state=initial_state,\n                decoding_strategy=self._hparams.decoding_strategy_infer,\n                embedding=self._tgt_embedder.embedding,\n                start_tokens=start_tokens,\n                end_token=self._tgt_vocab.eos_token_id,\n                mode=mode)\n\n    def decode(self, encoder_results, features, labels, mode):\n        \"\"\"Decodes.\n        \"\"\"\n        initial_state = self._connect(encoder_results, features, labels, mode)\n\n        if mode == tf.estimator.ModeKeys.PREDICT:\n            outputs, final_state, sequence_length = self._decode_infer(\n                initial_state, encoder_results, features, labels, mode)\n        else:\n            outputs, final_state, sequence_length = self._decode_train(\n                initial_state, encoder_results, features, labels, mode)\n\n        return {'outputs': outputs,\n                'final_state': final_state,\n                'sequence_length': sequence_length}\n\n"
  },
  {
    "path": "texar_repo/texar/models/seq2seq/seq2seq_base.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nBase class for seq2seq models.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom texar.models.model_base import ModelBase\nfrom texar.losses.mle_losses import sequence_sparse_softmax_cross_entropy\nfrom texar.data.data.paired_text_data import PairedTextData\nfrom texar.core.optimization import get_train_op\nfrom texar import HParams\nfrom texar.utils import utils\nfrom texar.utils.variables import collect_trainable_variables\n\n# pylint: disable=too-many-instance-attributes, unused-argument,\n# pylint: disable=too-many-arguments, no-self-use\n\n__all__ = [\n    \"Seq2seqBase\"\n]\n\nclass Seq2seqBase(ModelBase):\n    \"\"\"Base class inherited by all seq2seq model classes.\n\n    .. document private functions\n    .. automethod:: _build\n    \"\"\"\n\n    def __init__(self, data_hparams, hparams=None):\n        ModelBase.__init__(self, hparams)\n\n        self._data_hparams = HParams(data_hparams,\n                                     PairedTextData.default_hparams())\n\n        self._src_vocab = None\n        self._tgt_vocab = None\n        self._src_embedder = None\n        self._tgt_embedder = None\n        self._connector = None\n        self._encoder = None\n        self._decoder = None\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        .. code-block:: python\n\n            {\n                \"source_embedder\": \"WordEmbedder\",\n                \"source_embedder_hparams\": {},\n                \"target_embedder\": \"WordEmbedder\",\n                \"target_embedder_hparams\": {},\n                \"embedder_share\": True,\n                \"embedder_hparams_share\": True,\n                \"encoder\": \"UnidirectionalRNNEncoder\",\n                \"encoder_hparams\": {},\n                \"decoder\": \"BasicRNNDecoder\",\n                \"decoder_hparams\": {},\n                \"decoding_strategy_train\": \"train_greedy\",\n                \"decoding_strategy_infer\": \"infer_greedy\",\n                \"beam_search_width\": 0,\n                \"connector\": \"MLPTransformConnector\",\n                \"connector_hparams\": {},\n                \"optimization\": {},\n                \"name\": \"seq2seq\",\n            }\n\n        Here:\n\n        \"source_embedder\" : str or class or instance\n            Word embedder for source text. Can be a class, its name or module\n            path, or a class instance.\n\n        \"source_embedder_hparams\" : dict\n            Hyperparameters for constructing the source embedder. E.g.,\n            See :meth:`~texar.modules.WordEmbedder.default_hparams` for\n            hyperparameters of :class:`~texar.modules.WordEmbedder`. Ignored\n            if \"source_embedder\" is an instance.\n\n        \"target_embedder\", \"target_embedder_hparams\" :\n            Same as \"source_embedder\" and \"source_embedder_hparams\" but for\n            target text embedder.\n\n        \"embedder_share\" : bool\n            Whether to share the source and target embedder. If `True`,\n            source embedder will be used to embed target text.\n\n        \"embedder_hparams_share\" : bool\n            Whether to share the embedder configurations. If `True`,\n            target embedder will be created with \"source_embedder_hparams\".\n            But the two embedders have different set of trainable variables.\n\n        \"encoder\", \"encoder_hparams\" :\n            Same as \"source_embedder\" and \"source_embedder_hparams\" but for\n            encoder.\n\n        \"decoder\", \"decoder_hparams\" :\n            Same as \"source_embedder\" and \"source_embedder_hparams\" but for\n            decoder.\n\n        \"decoding_strategy_train\" : str\n            The decoding strategy in training mode. See\n            :meth:`~texar.modules.RNNDecoderBase._build` for details.\n\n        \"decoding_strategy_infer\" : str\n            The decoding strategy in eval/inference mode.\n\n        \"beam_search_width\" : int\n            Beam width. If > 1, beam search is used in eval/inference mode.\n\n        \"connector\", \"connector_hparams\" :\n            The connector class and hyperparameters. A connector transforms\n            an encoder final state to a decoder initial state.\n\n        \"optimization\" : dict\n            Hyperparameters of optimizating the model. See\n            :func:`~texar.core.default_optimization_hparams` for details.\n\n        \"name\" : str\n            Name of the model.\n        \"\"\"\n        hparams = ModelBase.default_hparams()\n        hparams.update({\n            \"name\": \"seq2seq\",\n            \"source_embedder\": \"WordEmbedder\",\n            \"source_embedder_hparams\": {},\n            \"target_embedder\": \"WordEmbedder\",\n            \"target_embedder_hparams\": {},\n            \"embedder_share\": True,\n            \"embedder_hparams_share\": True,\n            \"encoder\": \"UnidirectionalRNNEncoder\",\n            \"encoder_hparams\": {},\n            \"decoder\": \"BasicRNNDecoder\",\n            \"decoder_hparams\": {},\n            \"decoding_strategy_train\": \"train_greedy\",\n            \"decoding_strategy_infer\": \"infer_greedy\",\n            \"beam_search_width\": 0,\n            \"connector\": \"MLPTransformConnector\",\n            \"connector_hparams\": {},\n            \"optimization\": {}\n        })\n        return hparams\n\n    def _build_vocab(self):\n        self._src_vocab, self._tgt_vocab = PairedTextData.make_vocab(\n            self._data_hparams.source_dataset,\n            self._data_hparams.target_dataset)\n\n    def _build_embedders(self):\n        kwargs = {\n            \"vocab_size\": self._src_vocab.size,\n            \"hparams\": self._hparams.source_embedder_hparams.todict()\n        }\n        self._src_embedder = utils.check_or_get_instance(\n            self._hparams.source_embedder, kwargs,\n            [\"texar.modules\", \"texar.custom\"])\n\n        if self._hparams.embedder_share:\n            self._tgt_embedder = self._src_embedder\n        else:\n            kwargs = {\n                \"vocab_size\": self._tgt_vocab.size,\n            }\n            if self._hparams.embedder_hparams_share:\n                kwargs[\"hparams\"] = \\\n                        self._hparams.source_embedder_hparams.todict()\n            else:\n                kwargs[\"hparams\"] = \\\n                        self._hparams.target_embedder_hparams.todict()\n            self._tgt_embedder = utils.check_or_get_instance(\n                self._hparams.target_embedder, kwargs,\n                [\"texar.modules\", \"texar.custom\"])\n\n    def _build_encoder(self):\n        kwargs = {\n            \"hparams\": self._hparams.encoder_hparams.todict()\n        }\n        self._encoder = utils.check_or_get_instance(\n            self._hparams.encoder, kwargs,\n            [\"texar.modules\", \"texar.custom\"])\n\n    def _build_decoder(self):\n        raise NotImplementedError\n\n    def _build_connector(self):\n        kwargs = {\n            \"output_size\": self._decoder.state_size,\n            \"hparams\": self._hparams.connector_hparams.todict()\n        }\n        self._connector = utils.check_or_get_instance(\n            self._hparams.connector, kwargs,\n            [\"texar.modules\", \"texar.custom\"])\n\n    def get_loss(self, decoder_results, features, labels):\n        \"\"\"Computes the training loss.\n        \"\"\"\n        return sequence_sparse_softmax_cross_entropy(\n            labels=labels['target_text_ids'][:, 1:],\n            logits=decoder_results['outputs'].logits,\n            sequence_length=decoder_results['sequence_length'])\n\n    def _get_predictions(self, decoder_results, features, labels, loss=None):\n        raise NotImplementedError\n\n    def _get_train_op(self, loss):\n        varlist = collect_trainable_variables(\n            [self._src_embedder, self._tgt_embedder, self._encoder,\n             self._connector, self._decoder])\n        return get_train_op(\n            loss, variables=varlist, hparams=self._hparams.optimization)\n\n    def _get_eval_metric_ops(self, decoder_results, features, labels):\n        return None\n\n    def embed_source(self, features, labels, mode):\n        \"\"\"Embeds the inputs.\n        \"\"\"\n        raise NotImplementedError\n\n    def embed_target(self, features, labels, mode):\n        \"\"\"Embeds the target inputs. Used in training.\n        \"\"\"\n        raise NotImplementedError\n\n    def encode(self, features, labels, mode):\n        \"\"\"Encodes the inputs.\n        \"\"\"\n        raise NotImplementedError\n\n    def _connect(self, encoder_results, features, labels, mode):\n        \"\"\"Transforms encoder final state into decoder initial state.\n        \"\"\"\n        raise NotImplementedError\n\n    def decode(self, encoder_results, features, labels, mode):\n        \"\"\"Decodes.\n        \"\"\"\n        raise NotImplementedError\n\n    def _build(self, features, labels, params, mode, config=None):\n        self._build_vocab()\n        self._build_embedders()\n        self._build_encoder()\n        self._build_decoder()\n        self._build_connector()\n\n        encoder_results = self.encode(features, labels, mode)\n        decoder_results = self.decode(encoder_results, features, labels, mode)\n\n        loss, train_op, preds, eval_metric_ops = None, None, None, None\n        if mode == tf.estimator.ModeKeys.PREDICT:\n            preds = self._get_predictions(decoder_results, features, labels)\n        else:\n            loss = self.get_loss(decoder_results, features, labels)\n\n            if mode == tf.estimator.ModeKeys.TRAIN:\n                train_op = self._get_train_op(loss)\n            if mode == tf.estimator.ModeKeys.EVAL:\n                eval_metric_ops = self._get_eval_metric_ops(\n                    decoder_results, features, labels)\n\n            preds = self._get_predictions(decoder_results, features, labels,\n                                          loss)\n\n        return tf.estimator.EstimatorSpec(\n            mode=mode,\n            predictions=preds,\n            loss=loss,\n            train_op=train_op,\n            eval_metric_ops=eval_metric_ops)\n\n    def get_input_fn(self, mode, hparams=None): #pylint:disable=arguments-differ\n        \"\"\"Creates an input function `input_fn` that provides input data\n        for the model in an :tf_main:`Estimator <estimator/Estimator>`.\n        See, e.g., :tf_main:`tf.estimator.train_and_evaluate\n        <estimator/train_and_evaluate>`.\n\n        Args:\n            mode: One of members in\n                :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`.\n            hparams: A `dict` or an :class:`~texar.HParams` instance\n                containing the hyperparameters of\n                :class:`~texar.data.PairedTextData`. See\n                :meth:`~texar.data.PairedTextData.default_hparams` for the\n                the structure and default values of the hyperparameters.\n\n        Returns:\n            An input function that returns a tuple `(features, labels)`\n            when called. `features` contains data fields that are related\n            to source text, and `labels` contains data fields related\n            to target text. See :class:`~texar.data.PairedTextData` for\n            all data fields.\n        \"\"\"\n        def _input_fn():\n            data = PairedTextData(hparams)\n\n            iterator = data.dataset.make_initializable_iterator()\n            tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS,\n                                 iterator.initializer)\n\n            batch = iterator.get_next()\n\n            features, labels = {}, {}\n            for key, value in batch.items():\n                if key.startswith('source_'):\n                    features[key] = value\n                else:\n                    labels[key] = value\n            return features, labels\n\n        return _input_fn\n"
  },
  {
    "path": "texar_repo/texar/module_base.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nBase class for modules.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport re\n\nimport tensorflow as tf\n\nfrom texar.utils.exceptions import TexarError\nfrom texar.hyperparams import HParams\n\n__all__ = [\n    \"ModuleBase\"\n]\nclass ModuleBase(object):\n    \"\"\"Base class inherited by modules that create Variables and are\n    configurable through hyperparameters.\n\n    A Texar module inheriting :class:`~texar.ModuleBase` has following key\n    features:\n\n        - **Convenient variable re-use**: A module instance creates \\\n        its own sets of variables, and automatically re-uses its variables on \\\n        subsequent calls. Hence TF variable/name scope is \\\n        transparent to users. For example:\n\n            .. code-block:: python\n\n                encoder = UnidirectionalRNNEncoder(hparams) # create instance\n                output_1 = encoder(inputs_1) # variables are created\n                output_2 = encoder(inputs_2) # variables are re-used\n\n                print(encoder.trainable_variables) # access trainable variables\n                # [ ... ]\n\n        - **Configurable through hyperparameters**: Each module defines \\\n        allowed hyperparameters and default values. Hyperparameters not \\\n        specified by users will take default values.\n\n        - **Callable**: As the above example, a module instance is \"called\" \\\n        with input tensors and returns output tensors. Every call of a module \\\n        will add ops to the Graph to perform the module's logic.\n\n    Args:\n        hparams (dict, optional): Hyperparameters of the module. See\n            :meth:`default_hparams` for the structure and default values.\n\n\n    .. document private functions\n    .. automethod:: _build\n    \"\"\"\n\n    def __init__(self, hparams=None):\n        self._hparams = HParams(hparams, self.default_hparams())\n        self._template = tf.make_template(self._hparams.name, self._build,\n                                          create_scope_now_=True)\n        self._unique_name = self.variable_scope.name.split(\"/\")[-1]\n        self._trainable_variables = []\n        self._built = False\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a `dict` of hyperparameters of the module with default\n        values. Used to replace the missing values of input `hparams`\n        during module construction.\n\n        .. code-block:: python\n\n            {\n                \"name\": \"module\"\n            }\n        \"\"\"\n        return {\n            \"name\": \"module\"\n        }\n\n    def _build(self, *args, **kwargs):\n        \"\"\"Subclass must implement this method to build the logic.\n\n        Args:\n            *args: Arguments.\n            **kwargs: Keyword arguments.\n\n        Returns:\n            Output Tensor(s).\n        \"\"\"\n        raise NotImplementedError\n\n    def __call__(self, *args, **kwargs):\n        \"\"\"Executes the module logic defined in _build method\n\n        Args:\n            *args: Arguments of _build method.\n            **kwargs: Keyword arguments of _build method.\n\n        Returns:\n            The output of _build method.\n        \"\"\"\n        return self._template(*args, **kwargs)\n\n    def _add_internal_trainable_variables(self):  # pylint: disable=invalid-name\n        \"\"\"Collects trainable variables constructured internally in this module.\n\n        This is typically called at the end of `_build()` where all necessary\n        trainable variables have been constructed.\n        \"\"\"\n        scope_name = self.variable_scope.name\n        # Escape to handle possible \".\" characters in the name.\n        # Append a slash to the end to avoid searching scopes that have this\n        # scope name as a prefix.\n        scope_name = re.escape(scope_name) + \"/\"\n        internal_trainable_variables = tf.get_collection(\n            tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope_name)\n        self._add_trainable_variable(internal_trainable_variables)\n\n    def _add_trainable_variable(self, variable):\n        \"\"\"Adds a trainable variable to the trainable variable list of the\n        module.\n\n        Args:\n            variable: a (list of) trainable variable(s) constructed either\n                internally in the module or constructured outside but used\n                inside the module.\n        \"\"\"\n        if isinstance(variable, (list, tuple)):\n            for var in variable:\n                self._add_trainable_variable(var)\n        else:\n            if variable not in self._trainable_variables:\n                self._trainable_variables.append(variable)\n\n    @property\n    def variable_scope(self):\n        \"\"\"The variable scope of the module.\n        \"\"\"\n        return self._template.variable_scope\n\n    @property\n    def name(self):\n        \"\"\"The uniquified name of the module.\n        \"\"\"\n        return self._unique_name\n\n    @property\n    def trainable_variables(self):\n        \"\"\"The list of trainable variables of the module.\n        \"\"\"\n        if not self._built:\n            raise TexarError(\n                \"Attempting to access trainable_variables before module %s \"\n                \"was fully built. The module is built once it is called, \"\n                \"e.g., with `%s(...)`\" % (self.name, self.name))\n        return self._trainable_variables\n\n    @property\n    def hparams(self):\n        \"\"\"An :class:`~texar.HParams` instance. The hyperparameters\n        of the module.\n        \"\"\"\n        return self._hparams\n"
  },
  {
    "path": "texar_repo/texar/modules/__init__.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nModules of texar library module.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=wildcard-import\n\nfrom texar.modules.networks import *\nfrom texar.modules.embedders import *\nfrom texar.modules.encoders import *\nfrom texar.modules.decoders import *\nfrom texar.modules.connectors import *\nfrom texar.modules.classifiers import *\nfrom texar.modules.policies import *\nfrom texar.modules.qnets import *\nfrom texar.modules.memory import *\n"
  },
  {
    "path": "texar_repo/texar/modules/classifiers/__init__.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nModules of texar library classifiers.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=wildcard-import\n\nfrom texar.modules.classifiers.conv_classifiers import *\nfrom texar.modules.classifiers.rnn_classifiers import *\n"
  },
  {
    "path": "texar_repo/texar/modules/classifiers/classifier_base.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nBase class for encoders.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom texar.module_base import ModuleBase\n\n__all__ = [\n    \"ClassifierBase\"\n]\n\nclass ClassifierBase(ModuleBase):\n    \"\"\"Base class inherited by all classifier classes.\n    \"\"\"\n\n    def __init__(self, hparams=None):\n        ModuleBase.__init__(self, hparams)\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n        \"\"\"\n        return {\n            \"name\": \"classifier\"\n        }\n\n    def _build(self, inputs, *args, **kwargs):\n        \"\"\"Classifies the inputs.\n\n        Args:\n          inputs: Inputs to the classifier.\n          *args: Other arguments.\n          **kwargs: Keyword arguments.\n\n        Returns:\n          Classification results.\n        \"\"\"\n        raise NotImplementedError\n\n"
  },
  {
    "path": "texar_repo/texar/modules/classifiers/conv_classifiers.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nVarious classifier classes.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=not-context-manager, too-many-arguments, too-many-locals\n\nimport tensorflow as tf\n\nfrom texar.utils.exceptions import TexarError\nfrom texar.modules.classifiers.classifier_base import ClassifierBase\nfrom texar.modules.encoders.conv_encoders import Conv1DEncoder\nfrom texar.utils import utils\nfrom texar.hyperparams import HParams\n\n__all__ = [\n    \"Conv1DClassifier\"\n]\n\nclass Conv1DClassifier(ClassifierBase):\n    \"\"\"Simple Conv-1D classifier.\n    This is a combination of the\n    :class:`~texar.modules.Conv1DEncoder` with a classification layer.\n\n    Args:\n        hparams (dict, optional): Hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n\n    Example:\n\n        .. code-block:: python\n\n            clas = Conv1DClassifier(hparams={'num_classes': 10})\n\n            inputs = tf.random_uniform([64, 20, 256])\n            logits, pred = clas(inputs)\n            # logits == Tensor of shape [64, 10]\n            # pred   == Tensor of shape [64]\n\n    .. document private functions\n    .. automethod:: _build\n    \"\"\"\n\n    def __init__(self, hparams=None):\n        ClassifierBase.__init__(self, hparams)\n\n        with tf.variable_scope(self.variable_scope):\n            encoder_hparams = utils.dict_fetch(\n                hparams, Conv1DEncoder.default_hparams())\n            self._encoder = Conv1DEncoder(hparams=encoder_hparams)\n\n            # Add an additional dense layer if needed\n            self._num_classes = self._hparams.num_classes\n            if self._num_classes > 0:\n                if self._hparams.num_dense_layers <= 0:\n                    self._encoder.append_layer({\"type\": \"Flatten\"})\n\n                logit_kwargs = self._hparams.logit_layer_kwargs\n                if logit_kwargs is None:\n                    logit_kwargs = {}\n                elif not isinstance(logit_kwargs, HParams):\n                    raise ValueError(\n                        \"hparams['logit_layer_kwargs'] must be a dict.\")\n                else:\n                    logit_kwargs = logit_kwargs.todict()\n                logit_kwargs.update({\"units\": self._num_classes})\n                if 'name' not in logit_kwargs:\n                    logit_kwargs['name'] = \"logit_layer\"\n\n                self._encoder.append_layer(\n                    {\"type\": \"Dense\", \"kwargs\": logit_kwargs})\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        .. code-block:: python\n\n            {\n                # (1) Same hyperparameters as in Conv1DEncoder\n                ...\n\n                # (2) Additional hyperparameters\n                \"num_classes\": 2,\n                \"logit_layer_kwargs\": {\n                    \"use_bias\": False\n                },\n                \"name\": \"conv1d_classifier\"\n            }\n\n        Here:\n\n        1. Same hyperparameters as in :class:`~texar.modules.Conv1DEncoder`.\n        See the :meth:`~texar.modules.Conv1DEncoder.default_hparams`.\n        An instance of Conv1DEncoder is created for feature extraction.\n\n        2. Additional hyperparameters:\n\n            \"num_classes\" : int\n                Number of classes:\n\n                - If **`> 0`**, an additional :tf_main:`Dense <layers/Dense>` \\\n                layer is appended to the encoder to compute the logits over \\\n                classes.\n                - If **`<= 0`**, no dense layer is appended. The number of \\\n                classes is assumed to be the final dense layer size of the \\\n                encoder.\n\n            \"logit_layer_kwargs\" : dict\n                Keyword arguments for the logit Dense layer constructor,\n                except for argument \"units\" which is set to \"num_classes\".\n                Ignored if no extra logit layer is appended.\n\n            \"name\" : str\n                Name of the classifier.\n        \"\"\"\n        hparams = Conv1DEncoder.default_hparams()\n        hparams.update({\n            \"name\": \"conv1d_classifier\",\n            \"num_classes\": 2, #set to <=0 to avoid appending output layer\n            \"logit_layer_kwargs\": {\"use_bias\": False}\n        })\n        return hparams\n\n    def _build(self,    # pylint: disable=arguments-differ\n               inputs,\n               sequence_length=None,\n               dtype=None,\n               mode=None):\n        \"\"\"Feeds the inputs through the network and makes classification.\n\n        The arguments are the same as in :class:`~texar.modules.Conv1DEncoder`.\n\n        The predictions of binary classification (\"num_classes\"=1) and\n        multi-way classification (\"num_classes\">1) are different, as explained\n        below.\n\n        Args:\n            inputs: The inputs to the network, which is a 3D tensor. See\n                :class:`~texar.modules.Conv1DEncoder` for more details.\n            sequence_length (optional): An int tensor of shape `[batch_size]`\n                containing the length of each element in :attr:`inputs`.\n                If given, time steps beyond the length will first be masked out\n                before feeding to the layers.\n            dtype (optional): Type of the inputs. If not provided, infers\n                from inputs automatically.\n            mode (optional): A tensor taking value in\n                :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, including\n                `TRAIN`, `EVAL`, and `PREDICT`. If `None`,\n                :func:`texar.global_mode` is used.\n\n        Returns:\n            A tuple `(logits, pred)`, where\n\n            - **`logits`** is a Tensor of shape `[batch_size, num_classes]`\\\n            for `num_classes` >1, and `[batch_size]` for `num_classes` =1 \\\n            (i.e., binary classification).\n            - **`pred`** is the prediction, a Tensor of shape `[batch_size]` \\\n            and type `tf.int64`. For binary classification, the standard \\\n            sigmoid function is used for prediction, and the class labels are \\\n            `{0, 1}`.\n        \"\"\"\n        logits = self._encoder(inputs, sequence_length, dtype, mode)\n\n        num_classes = self._hparams.num_classes\n        is_binary = num_classes == 1\n        is_binary = is_binary or (num_classes <= 0 and logits.shape[1] == 1)\n\n        if is_binary:\n            pred = tf.greater(logits, 0)\n            logits = tf.reshape(logits, [-1])\n        else:\n            pred = tf.argmax(logits, 1)\n        pred = tf.to_int64(tf.reshape(pred, [-1]))\n\n        self._built = True\n\n        return logits, pred\n\n    @property\n    def trainable_variables(self):\n        \"\"\"The list of trainable variables of the module.\n        \"\"\"\n        if not self._built:\n            raise TexarError(\n                \"Attempting to access trainable_variables before module %s \"\n                \"was fully built. The module is built once it is called, \"\n                \"e.g., with `%s(...)`\" % (self.name, self.name))\n        return self._encoder.trainable_variables\n\n    @property\n    def num_classes(self):\n        \"\"\"The number of classes.\n        \"\"\"\n        return self._num_classes\n\n    @property\n    def nn(self): # pylint: disable=invalid-name\n        \"\"\"The classifier neural network.\n        \"\"\"\n        return self._encoder\n\n    def has_layer(self, layer_name):\n        \"\"\"Returns `True` if the network with the name exists. Returns `False`\n        otherwise.\n\n        Args:\n            layer_name (str): Name of the layer.\n        \"\"\"\n        return self._encoder.has_layer(layer_name)\n\n    def layer_by_name(self, layer_name):\n        \"\"\"Returns the layer with the name. Returns 'None' if the layer name\n        does not exist.\n\n        Args:\n            layer_name (str): Name of the layer.\n        \"\"\"\n        return self._encoder.layer_by_name(layer_name)\n\n    @property\n    def layers_by_name(self):\n        \"\"\"A dictionary mapping layer names to the layers.\n        \"\"\"\n        return self._encoder.layers_by_name\n\n    @property\n    def layers(self):\n        \"\"\"A list of the layers.\n        \"\"\"\n        return self._encoder.layers\n\n    @property\n    def layer_names(self):\n        \"\"\"A list of uniquified layer names.\n        \"\"\"\n        return self._encoder.layer_names\n\n    def layer_outputs_by_name(self, layer_name):\n        \"\"\"Returns the output tensors of the layer with the specified name.\n        Returns `None` if the layer name does not exist.\n\n        Args:\n            layer_name (str): Name of the layer.\n        \"\"\"\n        return self._encoder.layer_outputs_by_name(layer_name)\n\n    @property\n    def layer_outputs(self):\n        \"\"\"A list containing output tensors of each layer.\n        \"\"\"\n        return self._encoder.layer_outputs\n"
  },
  {
    "path": "texar_repo/texar/modules/classifiers/conv_classifiers_test.py",
    "content": "#\n\"\"\"\nUnit tests for conv encoders.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport tensorflow as tf\n\nimport texar as tx\nfrom texar.modules.classifiers.conv_classifiers import Conv1DClassifier\n\n\nclass Conv1DClassifierTest(tf.test.TestCase):\n    \"\"\"Tests :class:`~texar.modules.Conv1DClassifier` class.\n    \"\"\"\n\n    def test_classifier(self):\n        \"\"\"Tests classification.\n        \"\"\"\n        # case 1: default hparams\n        classifier = Conv1DClassifier()\n        self.assertEqual(len(classifier.layers), 5)\n        self.assertTrue(isinstance(classifier.layers[-1],\n                                   tf.layers.Dense))\n        inputs = tf.ones([64, 16, 300], tf.float32)\n        logits, pred = classifier(inputs)\n        self.assertEqual(logits.shape, [64, 2])\n        self.assertEqual(pred.shape, [64])\n\n        inputs = tf.placeholder(tf.float32, [64, None, 300])\n        logits, pred = classifier(inputs)\n        self.assertEqual(logits.shape, [64, 2])\n        self.assertEqual(pred.shape, [64])\n\n        # case 1\n        hparams = {\n            \"num_classes\": 10,\n            \"logit_layer_kwargs\": {\"use_bias\": False}\n        }\n        classifier = Conv1DClassifier(hparams=hparams)\n        inputs = tf.ones([64, 16, 300], tf.float32)\n        logits, pred = classifier(inputs)\n        self.assertEqual(logits.shape, [64, 10])\n        self.assertEqual(pred.shape, [64])\n\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/modules/classifiers/rnn_classifiers.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nVarious RNN classifiers.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport numpy as np\n\nimport tensorflow as tf\nfrom tensorflow.contrib.framework import nest\n\nfrom texar.modules.classifiers.classifier_base import ClassifierBase\nfrom texar.modules.encoders.rnn_encoders import \\\n        UnidirectionalRNNEncoder, _forward_single_output_layer\nfrom texar.core import layers\nfrom texar.utils import utils, shapes\nfrom texar.hyperparams import HParams\n\n# pylint: disable=too-many-arguments, invalid-name, no-member,\n# pylint: disable=too-many-branches, too-many-locals, too-many-statements\n\n__all__ = [\n    \"UnidirectionalRNNClassifier\"\n]\n\n#def RNNClassifierBase(ClassifierBase):\n#    \"\"\"Base class inherited by all RNN classifiers.\n#    \"\"\"\n#\n#    def __init__(self, hparams=None):\n#        ClassifierBase.__init__(self, hparams)\n\n\nclass UnidirectionalRNNClassifier(ClassifierBase):\n    \"\"\"One directional RNN classifier.\n    This is a combination of the\n    :class:`~texar.modules.UnidirectionalRNNEncoder` with a classification\n    layer. Both step-wise classification and sequence-level classification\n    are supported, specified in :attr:`hparams`.\n\n    Arguments are the same as in\n    :class:`~texar.modules.UnidirectionalRNNEncoder`.\n\n    Args:\n        cell: (RNNCell, optional) If not specified,\n            a cell is created as specified in :attr:`hparams[\"rnn_cell\"]`.\n        cell_dropout_mode (optional): A Tensor taking value of\n            :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, which\n            toggles dropout in the RNN cell (e.g., activates dropout in\n            TRAIN mode). If `None`, :func:`~texar.global_mode` is used.\n            Ignored if :attr:`cell` is given.\n        output_layer (optional): An instance of\n            :tf_main:`tf.layers.Layer <layers/Layer>`. Applies to the RNN cell\n            output of each step. If `None` (default), the output layer is\n            created as specified in :attr:`hparams[\"output_layer\"]`.\n        hparams (dict or HParams, optional): Hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n\n    .. document private functions\n    .. automethod:: _build\n    \"\"\"\n\n    def __init__(self,\n                 cell=None,\n                 cell_dropout_mode=None,\n                 output_layer=None,\n                 hparams=None):\n        ClassifierBase.__init__(self, hparams)\n\n        with tf.variable_scope(self.variable_scope):\n            # Creates the underlying encoder\n            encoder_hparams = utils.dict_fetch(\n                hparams, UnidirectionalRNNEncoder.default_hparams())\n            if encoder_hparams is not None:\n                encoder_hparams['name'] = None\n            self._encoder = UnidirectionalRNNEncoder(\n                cell=cell,\n                cell_dropout_mode=cell_dropout_mode,\n                output_layer=output_layer,\n                hparams=encoder_hparams)\n\n            # Creates an additional classification layer if needed\n            self._num_classes = self._hparams.num_classes\n            if self._num_classes <= 0:\n                self._logit_layer = None\n            else:\n                logit_kwargs = self._hparams.logit_layer_kwargs\n                if logit_kwargs is None:\n                    logit_kwargs = {}\n                elif not isinstance(logit_kwargs, HParams):\n                    raise ValueError(\n                        \"hparams['logit_layer_kwargs'] must be a dict.\")\n                else:\n                    logit_kwargs = logit_kwargs.todict()\n                logit_kwargs.update({\"units\": self._num_classes})\n                if 'name' not in logit_kwargs:\n                    logit_kwargs['name'] = \"logit_layer\"\n\n                layer_hparams = {\"type\": \"Dense\", \"kwargs\": logit_kwargs}\n                self._logit_layer = layers.get_layer(hparams=layer_hparams)\n\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        .. code-block:: python\n\n            {\n                # (1) Same hyperparameters as in UnidirectionalRNNEncoder\n                ...\n\n                # (2) Additional hyperparameters\n                \"num_classes\": 2,\n                \"logit_layer_kwargs\": None,\n                \"clas_strategy\": \"final_time\",\n                \"max_seq_length\": None,\n                \"name\": \"unidirectional_rnn_classifier\"\n            }\n\n        Here:\n\n        1. Same hyperparameters as in\n        :class:`~texar.modules.UnidirectionalRNNEncoder`.\n        See the :meth:`~texar.modules.UnidirectionalRNNEncoder.default_hparams`.\n        An instance of UnidirectionalRNNEncoder is created for feature\n        extraction.\n\n        2. Additional hyperparameters:\n\n            \"num_classes\" : int\n                Number of classes:\n\n                - If **`> 0`**, an additional :tf_main:`Dense <layers/Dense>` \\\n                layer is appended to the encoder to compute the logits over \\\n                classes.\n                - If **`<= 0`**, no dense layer is appended. The number of \\\n                classes is assumed to be the final dense layer size of the \\\n                encoder.\n\n            \"logit_layer_kwargs\" : dict\n                Keyword arguments for the logit Dense layer constructor,\n                except for argument \"units\" which is set to \"num_classes\".\n                Ignored if no extra logit layer is appended.\n\n            \"clas_strategy\" : str\n                The classification strategy, one of:\n\n                - **\"final_time\"**: Sequence-leve classification based on \\\n                the output of the final time step. One sequence has one class.\n                - **\"all_time\"**: Sequence-level classification based on \\\n                the output of all time steps. One sequence has one class.\n                - **\"time_wise\"**: Step-wise classfication, i.e., make \\\n                classification for each time step based on its output.\n\n            \"max_seq_length\" : int, optional\n                Maximum possible length of input sequences. Required if\n                \"clas_strategy\" is \"all_time\".\n\n            \"name\" : str\n                Name of the classifier.\n        \"\"\"\n        hparams = UnidirectionalRNNEncoder.default_hparams()\n        hparams.update({\n            \"num_classes\": 2,\n            \"logit_layer_kwargs\": None,\n            \"clas_strategy\": \"final_time\",\n            \"max_seq_length\": None,\n            \"name\": \"unidirectional_rnn_classifier\"\n        })\n        return hparams\n\n    def _build(self,\n               inputs,\n               sequence_length=None,\n               initial_state=None,\n               time_major=False,\n               mode=None,\n               **kwargs):\n        \"\"\"Feeds the inputs through the network and makes classification.\n\n        The arguments are the same as in\n        :class:`~texar.modules.UnidirectionalRNNEncoder`.\n\n        Args:\n            inputs: A 3D Tensor of shape `[batch_size, max_time, dim]`.\n                The first two dimensions\n                `batch_size` and `max_time` may be exchanged if\n                `time_major=True` is specified.\n            sequence_length (optional): A 1D int tensor of shape `[batch_size]`.\n                Sequence lengths\n                of the batch inputs. Used to copy-through state and zero-out\n                outputs when past a batch element's sequence length.\n            initial_state (optional): Initial state of the RNN.\n            time_major (bool): The shape format of the :attr:`inputs` and\n                :attr:`outputs` Tensors. If `True`, these tensors are of shape\n                `[max_time, batch_size, depth]`. If `False` (default),\n                these tensors are of shape `[batch_size, max_time, depth]`.\n            mode (optional): A tensor taking value in\n                :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, including\n                `TRAIN`, `EVAL`, and `PREDICT`. Controls output layer dropout\n                if the output layer is specified with :attr:`hparams`.\n                If `None` (default), :func:`texar.global_mode()`\n                is used.\n            return_cell_output (bool): Whether to return the output of the RNN\n                cell. This is the results prior to the output layer.\n            **kwargs: Optional keyword arguments of\n                :tf_main:`tf.nn.dynamic_rnn <nn/dynamic_rnn>`,\n                such as `swap_memory`, `dtype`, `parallel_iterations`, etc.\n\n        Returns:\n            A tuple `(logits, pred)`, containing the logits over classes and\n            the predictions, respectively.\n\n            - If \"clas_strategy\"==\"final_time\" or \"all_time\"\n\n                - If \"num_classes\"==1, `logits` and `pred` are of both \\\n                shape `[batch_size]`\n                - If \"num_classes\">1, `logits` is of shape \\\n                `[batch_size, num_classes]` and `pred` is of shape \\\n                `[batch_size]`.\n\n            - If \"clas_strategy\"==\"time_wise\",\n\n                - If \"num_classes\"==1, `logits` and `pred` are of both \\\n                shape `[batch_size, max_time]`\n                - If \"num_classes\">1, `logits` is of shape \\\n                `[batch_size, max_time, num_classes]` and `pred` is of shape \\\n                `[batch_size, max_time]`.\n                - If `time_major` is `True`, the batch and time dimensions are\\\n                exchanged.\n        \"\"\"\n        enc_outputs, _, enc_output_size = self._encoder(\n            inputs=inputs,\n            sequence_length=sequence_length,\n            initial_state=initial_state,\n            time_major=time_major,\n            mode=mode,\n            return_output_size=True,\n            **kwargs)\n\n        # Flatten enc_outputs\n        enc_outputs_flat = nest.flatten(enc_outputs)\n        enc_output_size_flat = nest.flatten(enc_output_size)\n        enc_output_dims_flat = [np.prod(xs) for xs in enc_output_size_flat]\n        enc_outputs_flat = [shapes.flatten(x, 2, xs) for x, xs\n                            in zip(enc_outputs_flat, enc_output_dims_flat)]\n        if len(enc_outputs_flat) == 1:\n            enc_outputs_flat = enc_outputs_flat[0]\n        else:\n            enc_outputs_flat = tf.concat(enc_outputs_flat, axis=2)\n\n        # Compute logits\n        stra = self._hparams.clas_strategy\n        if stra == 'time_wise':\n            logits = enc_outputs_flat\n        elif stra == 'final_time':\n            if time_major:\n                logits = enc_outputs_flat[-1, :, :]\n            else:\n                logits = enc_outputs_flat[:, -1, :]\n        elif stra == 'all_time':\n            if self._logit_layer is None:\n                raise ValueError(\n                    'logit layer must not be `None` if '\n                    'clas_strategy=\"all_time\". Specify the logit layer by '\n                    'either passing the layer in the constructor or '\n                    'specifying the hparams.')\n            if self._hparams.max_seq_length is None:\n                raise ValueError(\n                    'hparams.max_seq_length must not be `None` if '\n                    'clas_strategy=\"all_time\"')\n        else:\n            raise ValueError('Unknown classification strategy: {}'.format(stra))\n\n        if self._logit_layer is not None:\n            logit_input_dim = np.sum(enc_output_dims_flat)\n            if stra == 'time_wise':\n                logits, _ = _forward_single_output_layer(\n                    logits, logit_input_dim, self._logit_layer)\n            elif stra == 'final_time':\n                logits = self._logit_layer(logits)\n            elif stra == 'all_time':\n                # Pad `enc_outputs_flat` to have max_seq_length before flatten\n                length_diff = self._hparams.max_seq_length - tf.shape(inputs)[1]\n                length_diff = tf.reshape(length_diff, [1, 1])\n                # Set `paddings = [[0, 0], [0, length_dif], [0, 0]]`\n                paddings = tf.pad(length_diff, paddings=[[1, 1], [1, 0]])\n                logit_input = tf.pad(enc_outputs_flat, paddings=paddings)\n\n                logit_input_dim *= self._hparams.max_seq_length\n                logit_input = tf.reshape(logit_input, [-1, logit_input_dim])\n\n                logits = self._logit_layer(logit_input)\n\n        # Compute predications\n        num_classes = self._hparams.num_classes\n        is_binary = num_classes == 1\n        is_binary = is_binary or (num_classes <= 0 and logits.shape[-1] == 1)\n\n        if stra == 'time_wise':\n            if is_binary:\n                pred = tf.squeeze(tf.greater(logits, 0), -1)\n                logits = tf.squeeze(logits, -1)\n            else:\n                pred = tf.argmax(logits, axis=-1)\n        else:\n            if is_binary:\n                pred = tf.greater(logits, 0)\n                logits = tf.reshape(logits, [-1])\n            else:\n                pred = tf.argmax(logits, axis=-1)\n            pred = tf.reshape(pred, [-1])\n        pred = tf.to_int64(pred)\n\n        if not self._built:\n            self._add_internal_trainable_variables()\n            # Add trainable variables of `self._logit_layer`\n            # which may be constructed externally.\n            if self._logit_layer:\n                self._add_trainable_variable(\n                    self._logit_layer.trainable_variables)\n            self._built = True\n\n        return logits, pred\n\n    @property\n    def num_classes(self):\n        \"\"\"The number of classes, specified in :attr:`hparams`.\n        \"\"\"\n        return self._hparams.num_classes\n"
  },
  {
    "path": "texar_repo/texar/modules/classifiers/rnn_classifiers_test.py",
    "content": "#\n\"\"\"\nUnit tests for RNN classifiers.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport numpy as np\n\nimport tensorflow as tf\n\nfrom texar.modules.classifiers.rnn_classifiers import \\\n        UnidirectionalRNNClassifier\n\n# pylint: disable=too-many-locals, no-member\n\nclass UnidirectionalRNNClassifierTest(tf.test.TestCase):\n    \"\"\"Tests :class:`~texar.modules.UnidirectionalRNNClassifierTest` class.\n    \"\"\"\n\n    def test_trainable_variables(self):\n        \"\"\"Tests the functionality of automatically collecting trainable\n        variables.\n        \"\"\"\n        inputs = tf.placeholder(dtype=tf.float32, shape=[None, None, 100])\n\n        # case 1\n        clas = UnidirectionalRNNClassifier()\n        _, _ = clas(inputs)\n        self.assertEqual(len(clas.trainable_variables), 2+2)\n\n        # case 2\n        hparams = {\n            \"output_layer\": {\"num_layers\": 2},\n            \"logit_layer_kwargs\": {\"use_bias\": False}\n        }\n        clas = UnidirectionalRNNClassifier(hparams=hparams)\n        _, _ = clas(inputs)\n        self.assertEqual(len(clas.trainable_variables), 2+2+2+1)\n        _, _ = clas(inputs)\n        self.assertEqual(len(clas.trainable_variables), 2+2+2+1)\n\n    def test_encode(self):\n        \"\"\"Tests encoding.\n        \"\"\"\n        max_time = 8\n        batch_size = 16\n        emb_dim = 100\n        inputs = tf.random_uniform([batch_size, max_time, emb_dim],\n                                   maxval=1., dtype=tf.float32)\n\n        # case 1\n        clas = UnidirectionalRNNClassifier()\n        logits, pred = clas(inputs)\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            logits_, pred_ = sess.run([logits, pred])\n            self.assertEqual(logits_.shape, (batch_size, clas.num_classes))\n            self.assertEqual(pred_.shape, (batch_size, ))\n\n        # case 2\n        hparams = {\n            \"num_classes\": 10,\n            \"clas_strategy\": \"time_wise\"\n        }\n        clas = UnidirectionalRNNClassifier(hparams=hparams)\n        logits, pred = clas(inputs)\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            logits_, pred_ = sess.run([logits, pred])\n            self.assertEqual(logits_.shape,\n                             (batch_size, max_time, clas.num_classes))\n            self.assertEqual(pred_.shape, (batch_size, max_time))\n\n        # case 3\n        hparams = {\n            \"output_layer\": {\n                \"num_layers\": 1,\n                \"layer_size\": 10\n            },\n            \"num_classes\": 0,\n            \"clas_strategy\": \"time_wise\"\n        }\n        clas = UnidirectionalRNNClassifier(hparams=hparams)\n        logits, pred = clas(inputs)\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            logits_, pred_ = sess.run([logits, pred])\n            self.assertEqual(logits_.shape,\n                             (batch_size, max_time, 10))\n            self.assertEqual(pred_.shape, (batch_size, max_time))\n\n\n        # case 4\n        hparams = {\n            \"num_classes\": 10,\n            \"clas_strategy\": \"all_time\",\n            \"max_seq_length\": max_time\n        }\n        inputs = tf.placeholder(tf.float32, shape=[batch_size, 6, emb_dim])\n        clas = UnidirectionalRNNClassifier(hparams=hparams)\n        logits, pred = clas(inputs)\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            logits_, pred_ = sess.run(\n                [logits, pred],\n                feed_dict={inputs: np.random.randn(batch_size, 6, emb_dim)})\n            self.assertEqual(logits_.shape, (batch_size, clas.num_classes))\n            self.assertEqual(pred_.shape, (batch_size, ))\n\n    def test_binary(self):\n        \"\"\"Tests binary classification.\n        \"\"\"\n        max_time = 8\n        batch_size = 16\n        emb_dim = 100\n        inputs = tf.random_uniform([batch_size, max_time, emb_dim],\n                                   maxval=1., dtype=tf.float32)\n\n        # case 1 omittd\n\n        # case 2\n        hparams = {\n            \"num_classes\": 1,\n            \"clas_strategy\": \"time_wise\"\n        }\n        clas = UnidirectionalRNNClassifier(hparams=hparams)\n        logits, pred = clas(inputs)\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            logits_, pred_ = sess.run([logits, pred])\n            self.assertEqual(logits_.shape, (batch_size, max_time))\n            self.assertEqual(pred_.shape, (batch_size, max_time))\n\n        # case 3\n        hparams = {\n            \"output_layer\": {\n                \"num_layers\": 1,\n                \"layer_size\": 10\n            },\n            \"num_classes\": 1,\n            \"clas_strategy\": \"time_wise\"\n        }\n        clas = UnidirectionalRNNClassifier(hparams=hparams)\n        logits, pred = clas(inputs)\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            logits_, pred_ = sess.run([logits, pred])\n            self.assertEqual(logits_.shape, (batch_size, max_time))\n            self.assertEqual(pred_.shape, (batch_size, max_time))\n\n\n        # case 4\n        hparams = {\n            \"num_classes\": 1,\n            \"clas_strategy\": \"all_time\",\n            \"max_seq_length\": max_time\n        }\n        inputs = tf.placeholder(tf.float32, shape=[batch_size, 6, emb_dim])\n        clas = UnidirectionalRNNClassifier(hparams=hparams)\n        logits, pred = clas(inputs)\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            logits_, pred_ = sess.run(\n                [logits, pred],\n                feed_dict={inputs: np.random.randn(batch_size, 6, emb_dim)})\n            self.assertEqual(logits_.shape, (batch_size, ))\n            self.assertEqual(pred_.shape, (batch_size, ))\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/modules/connectors/__init__.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nModules of texar library connectors.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=wildcard-import\n\nfrom texar.modules.connectors.connector_base import *\nfrom texar.modules.connectors.connectors import *\n\n"
  },
  {
    "path": "texar_repo/texar/modules/connectors/connector_base.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nBase class for connectors that transform inputs into specified output shape.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom texar.module_base import ModuleBase\n\n__all__ = [\n    \"ConnectorBase\"\n]\n\nclass ConnectorBase(ModuleBase):\n    \"\"\"Base class inherited by all connector classes. A connector is to\n    transform inputs into outputs with any specified structure and shape.\n    For example, tranforming the final state of an encoder to the initial\n    state of a decoder, and performing stochastic sampling in between as\n    in Variational Autoencoders (VAEs).\n\n    Args:\n        output_size: Size of output **excluding** the batch dimension. For\n            example, set `output_size` to `dim` to generate output of\n            shape `[batch_size, dim]`.\n            Can be an `int`, a tuple of `int`, a Tensorshape, or a tuple of\n            TensorShapes.\n            For example, to transform inputs to have decoder state size, set\n            `output_size=decoder.state_size`.\n        hparams (dict, optional): Hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n    \"\"\"\n\n    def __init__(self, output_size, hparams=None):\n        ModuleBase.__init__(self, hparams)\n        self._output_size = output_size\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n        \"\"\"\n        return {\n            \"name\": \"connector\"\n        }\n\n    def _build(self, *args, **kwargs):\n        \"\"\"Transforms inputs to outputs with specified shape.\n        \"\"\"\n        raise NotImplementedError\n\n    @property\n    def output_size(self):\n        \"\"\"The output size.\n        \"\"\"\n        return self._output_size\n"
  },
  {
    "path": "texar_repo/texar/modules/connectors/connectors.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nVarious connectors.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport numpy as np\n\nimport tensorflow as tf\nfrom tensorflow import distributions as tf_dstr\nfrom tensorflow.python.util import nest    # pylint: disable=E0611\n\nfrom texar.modules.connectors.connector_base import ConnectorBase\nfrom texar.core import layers\nfrom texar.utils.utils import get_function, check_or_get_instance\n\n# pylint: disable=too-many-locals, arguments-differ\n# pylint: disable=too-many-arguments, invalid-name, no-member\n\n__all__ = [\n    \"ConstantConnector\",\n    \"ForwardConnector\",\n    \"MLPTransformConnector\",\n    \"ReparameterizedStochasticConnector\",\n    \"StochasticConnector\",\n    #\"ConcatConnector\"\n]\n\ndef _assert_same_size(outputs, output_size):\n    \"\"\"Check if outputs match output_size\n\n    Args:\n        outputs: A Tensor or a (nested) tuple of tensors\n        output_size: Can be an Integer, a TensorShape, or a (nested) tuple of\n            Integers or TensorShape.\n    \"\"\"\n    nest.assert_same_structure(outputs, output_size)\n    flat_output_size = nest.flatten(output_size)\n    flat_output = nest.flatten(outputs)\n\n    for (output, size) in zip(flat_output, flat_output_size):\n        if output[0].shape != tf.TensorShape(size):\n            raise ValueError(\n                \"The output size does not match the the required output_size\")\n\ndef _get_tensor_depth(x):\n    \"\"\"Returns the size of a tensor excluding the first dimension\n    (typically the batch dimension).\n\n    Args:\n        x: A tensor.\n    \"\"\"\n    return np.prod(x.get_shape().as_list()[1:])\n\ndef _mlp_transform(inputs, output_size, activation_fn=tf.identity):\n    \"\"\"Transforms inputs through a fully-connected layer that creates the output\n    with specified size.\n\n    Args:\n        inputs: A Tensor of shape `[batch_size, ...]` (i.e., batch-major), or a\n            (nested) tuple of such elements. A Tensor or a (nested) tuple of\n            Tensors with shape `[max_time, batch_size, ...]` (i.e., time-major)\n            can be transposed to batch-major using\n            :func:`~texar.utils.transpose_batch_time` prior to this\n            function.\n        output_size: Can be an Integer, a TensorShape, or a (nested) tuple of\n            Integers or TensorShape.\n        activation_fn: Activation function applied to the output.\n\n    Returns:\n        If :attr:`output_size` is an Integer or a TensorShape, returns a Tensor\n        of shape `[batch_size x output_size]`. If :attr:`output_size` is a tuple\n        of Integers or TensorShape, returns a tuple having the same structure as\n        :attr:`output_size`, where each element Tensor has the same size as\n        defined in :attr:`output_size`.\n    \"\"\"\n    # Flatten inputs\n    flat_input = nest.flatten(inputs)\n    dims = [_get_tensor_depth(x) for x in flat_input]\n    flat_input = [tf.reshape(x, ([-1, d])) for x, d in zip(flat_input, dims)]\n    concat_input = tf.concat(flat_input, 1)\n\n    # Get output dimension\n    flat_output_size = nest.flatten(output_size)\n    if isinstance(flat_output_size[0], tf.TensorShape):\n        size_list = [0] * len(flat_output_size)\n        for (i, shape) in enumerate(flat_output_size):\n            size_list[i] = np.prod([dim.value for dim in shape])\n    else:\n        size_list = flat_output_size\n    sum_output_size = sum(size_list)\n\n    #fc_output = tf.contrib.layers.fully_connected(\n    #    concat_input, sum_output_size, activation_fn=activation_fn)\n    fc_output = tf.layers.dense(\n        concat_input, sum_output_size, activation=activation_fn)\n\n    flat_output = tf.split(fc_output, size_list, axis=1)\n\n    if isinstance(flat_output_size[0], tf.TensorShape):\n        for (i, shape) in enumerate(flat_output_size):\n            flat_output[i] = tf.reshape(flat_output[i], [-1] + shape.as_list())\n    output = nest.pack_sequence_as(structure=output_size,\n                                   flat_sequence=flat_output)\n\n    return output\n\n\nclass ConstantConnector(ConnectorBase):\n    \"\"\"Creates a constant Tensor or (nested) tuple of Tensors that\n    contains a constant value.\n\n    Args:\n        output_size: Size of output **excluding** the batch dimension. For\n            example, set `output_size` to `dim` to generate output of\n            shape `[batch_size, dim]`.\n            Can be an `int`, a tuple of `int`, a Tensorshape, or a tuple of\n            TensorShapes.\n            For example, to transform inputs to have decoder state size, set\n            `output_size=decoder.state_size`.\n        hparams (dict, optional): Hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n\n    This connector does not have trainable parameters.\n    See :meth:`_build` for the inputs and outputs of the connector.\n\n    Example:\n\n        .. code-block:: python\n\n            connector = Connector(cell.state_size)\n            zero_state = connector(batch_size=64, value=0.)\n            one_state = connector(batch_size=64, value=1.)\n\n    .. document private functions\n    .. automethod:: _build\n    \"\"\"\n    def __init__(self, output_size, hparams=None):\n        ConnectorBase.__init__(self, output_size, hparams)\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        .. code-block:: python\n\n            {\n                \"value\": 0.,\n                \"name\": \"constant_connector\"\n            }\n\n        Here:\n\n        \"value\" : float\n            The constant scalar that the output tensor(s) has. Ignored if\n            `value` is given to :meth:`_build`.\n\n        \"name\" : str\n            Name of the connector.\n        \"\"\"\n        return {\n            \"value\": 0.,\n            \"name\": \"constant_connector\"\n        }\n\n    def _build(self, batch_size, value=None):\n        \"\"\"Creates output tensor(s) that has the given value.\n\n        Args:\n            batch_size: An `int` or `int` scalar Tensor, the batch size.\n            value (optional): A scalar, the value that\n                the output tensor(s) has. If `None`, \"value\" in :attr:`hparams`\n                is used.\n\n        Returns:\n            A (structure of) tensor whose structure is the same as\n            :attr:`output_size`, with value speicified by\n            `value` or :attr:`hparams`.\n        \"\"\"\n        value_ = value\n        if value_ is None:\n            value_ = self.hparams.value\n        output = nest.map_structure(\n            lambda x: tf.constant(value_, shape=[batch_size, x]),\n            self._output_size)\n\n        self._built = True\n\n        return output\n\n\nclass ForwardConnector(ConnectorBase):\n    \"\"\"Transforms inputs to have specified structure.\n\n    Args:\n        output_size: Size of output **excluding** the batch dimension. For\n            example, set `output_size` to `dim` to generate output of\n            shape `[batch_size, dim]`.\n            Can be an `int`, a tuple of `int`, a Tensorshape, or a tuple of\n            TensorShapes.\n            For example, to transform inputs to have decoder state size, set\n            `output_size=decoder.state_size`.\n        hparams (dict, optional): Hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n\n    This connector does not have trainable parameters.\n    See :meth:`_build` for the inputs and outputs of the connector.\n\n    The input to the connector must have the same structure with\n    :attr:`output_size`, or must have the same number of elements and be\n    re-packable into the structure of :attr:`output_size`. Note that if input\n    is or contains a `dict` instance, the keys will be sorted to pack in\n    deterministic order (See\n    :tf_main:`pack_sequence_as <contrib/framework/nest/pack_sequence_as>`\n    for more details).\n\n    Example:\n\n        .. code-block:: python\n\n            cell = LSTMCell(num_units=256)\n            # cell.state_size == LSTMStateTuple(c=256, h=256)\n\n            connector = ForwardConnector(cell.state_size)\n            output = connector([tensor_1, tensor_2])\n            # output == LSTMStateTuple(c=tensor_1, h=tensor_2)\n\n    .. document private functions\n    .. automethod:: _build\n    \"\"\"\n\n    def __init__(self, output_size, hparams=None):\n        ConnectorBase.__init__(self, output_size, hparams)\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        .. code-block:: python\n\n            {\n                \"name\": \"forward_connector\"\n            }\n\n        Here:\n\n        \"name\" : str\n            Name of the connector.\n        \"\"\"\n        return {\n            \"name\": \"forward_connector\"\n        }\n\n    def _build(self, inputs):\n        \"\"\"Transforms inputs to have the same structure as with\n        :attr:`output_size`. Values of the inputs are not changed.\n\n        :attr:`inputs` must either have the same structure, or have the same\n        number of elements with :attr:`output_size`.\n\n        Args:\n            inputs: The input (structure of) tensor to pass forward.\n\n        Returns:\n            A (structure of) tensors that re-packs `inputs` to have\n            the specified structure of `output_size`.\n        \"\"\"\n        output = inputs\n        try:\n            nest.assert_same_structure(inputs, self._output_size)\n        except (ValueError, TypeError):\n            flat_input = nest.flatten(inputs)\n            output = nest.pack_sequence_as(\n                self._output_size, flat_input)\n\n        self._built = True\n\n        return output\n\n\nclass MLPTransformConnector(ConnectorBase):\n    \"\"\"Transforms inputs with an MLP layer and packs the results into the\n    specified structure and size.\n\n    Args:\n        output_size: Size of output **excluding** the batch dimension. For\n            example, set `output_size` to `dim` to generate output of\n            shape `[batch_size, dim]`.\n            Can be an `int`, a tuple of `int`, a Tensorshape, or a tuple of\n            TensorShapes.\n            For example, to transform inputs to have decoder state size, set\n            `output_size=decoder.state_size`.\n        hparams (dict, optional): Hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n\n    See :meth:`_build` for the inputs and outputs of the connector.\n\n    The input to the connector can have arbitrary structure and size.\n\n    Example:\n\n        .. code-block:: python\n\n            cell = LSTMCell(num_units=256)\n            # cell.state_size == LSTMStateTuple(c=256, h=256)\n\n            connector = MLPTransformConnector(cell.state_size)\n            inputs = tf.zeros([64, 10])\n            output = connector(inputs)\n            # output == LSTMStateTuple(c=tensor_of_shape_(64, 256),\n            #                          h=tensor_of_shape_(64, 256))\n\n        .. code-block:: python\n\n            ## Use to connect encoder and decoder with different state size\n            encoder = UnidirectionalRNNEncoder(...)\n            _, final_state = encoder(inputs=...)\n\n            decoder = BasicRNNDecoder(...)\n            connector = MLPTransformConnector(decoder.state_size)\n\n            _ = decoder(\n                initial_state=connector(final_state),\n                ...)\n\n    .. document private functions\n    .. automethod:: _build\n    \"\"\"\n\n    def __init__(self, output_size, hparams=None):\n        ConnectorBase.__init__(self, output_size, hparams)\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        .. code-block:: python\n\n            {\n                \"activation_fn\": \"identity\",\n                \"name\": \"mlp_connector\"\n            }\n\n        Here:\n\n        \"activation_fn\" : str or callable\n            The activation function applied to the outputs of the MLP\n            transformation layer. Can\n            be a function, or its name or module path.\n\n        \"name\" : str\n            Name of the connector.\n        \"\"\"\n        return {\n            \"activation_fn\": \"identity\",\n            \"name\": \"mlp_connector\"\n        }\n\n    def _build(self, inputs):\n        \"\"\"Transforms inputs with an MLP layer and packs the results to have\n        the same structure as specified by :attr:`output_size`.\n\n        Args:\n            inputs: Input (structure of) tensors to be transformed. Must be a\n                Tensor of shape `[batch_size, ...]` or a (nested) tuple of\n                such Tensors. That is, the first dimension of (each) tensor\n                must be the batch dimension.\n\n        Returns:\n            A Tensor or a (nested) tuple of Tensors of the same structure of\n            `output_size`.\n        \"\"\"\n        activation_fn = layers.get_activation_fn(self.hparams.activation_fn)\n\n        output = _mlp_transform(inputs, self._output_size, activation_fn)\n\n        if not self._built:\n            self._add_internal_trainable_variables()\n            self._built = True\n\n        return output\n\n\nclass ReparameterizedStochasticConnector(ConnectorBase):\n    \"\"\"Samples from a distribution with reparameterization trick, and\n    transforms samples into specified size.\n\n    Reparameterization allows gradients to be back-propagated through the\n    stochastic samples. Used in, e.g., Variational Autoencoders (VAEs).\n\n    Args:\n        output_size: Size of output **excluding** the batch dimension. For\n            example, set `output_size` to `dim` to generate output of\n            shape `[batch_size, dim]`.\n            Can be an `int`, a tuple of `int`, a Tensorshape, or a tuple of\n            TensorShapes.\n            For example, to transform inputs to have decoder state size, set\n            `output_size=decoder.state_size`.\n        hparams (dict, optional): Hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n\n    Example:\n\n        .. code-block:: python\n\n            cell = LSTMCell(num_units=256)\n            # cell.state_size == LSTMStateTuple(c=256, h=256)\n\n            connector = ReparameterizedStochasticConnector(cell.state_size)\n\n            kwargs = {\n                'loc': tf.zeros([batch_size, 10]),\n                'scale_diag': tf.ones([batch_size, 10])\n            }\n            output, sample = connector(distribution_kwargs=kwargs)\n            # output == LSTMStateTuple(c=tensor_of_shape_(batch_size, 256),\n            #                          h=tensor_of_shape_(batch_size, 256))\n            # sample == Tensor([batch_size, 10])\n\n\n            kwargs = {\n                'loc': tf.zeros([10]),\n                'scale_diag': tf.ones([10])\n            }\n            output_, sample_ = connector(distribution_kwargs=kwargs,\n                                         num_samples=batch_size_)\n            # output_ == LSTMStateTuple(c=tensor_of_shape_(batch_size_, 256),\n            #                           h=tensor_of_shape_(batch_size_, 256))\n            # sample == Tensor([batch_size_, 10])\n\n    .. document private functions\n    .. automethod:: _build\n    \"\"\"\n\n    def __init__(self, output_size, hparams=None):\n        ConnectorBase.__init__(self, output_size, hparams)\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        .. code-block:: python\n\n            {\n                \"activation_fn\": \"identity\",\n                \"name\": \"reparameterized_stochastic_connector\"\n            }\n\n        Here:\n\n        \"activation_fn\" : str\n            The activation function applied to the outputs of the MLP\n            transformation layer. Can\n            be a function, or its name or module path.\n\n        \"name\" : str\n            Name of the connector.\n        \"\"\"\n        return {\n            \"activation_fn\": \"tensorflow.identity\",\n            \"name\": \"reparameterized_stochastic_connector\"\n        }\n\n    def _build(self,\n               distribution='MultivariateNormalDiag',\n               distribution_kwargs=None,\n               transform=True,\n               num_samples=None):\n        \"\"\"Samples from a distribution and optionally performs transformation\n        with an MLP layer.\n\n        The distribution must be reparameterizable, i.e.,\n        `distribution.reparameterization_type = FULLY_REPARAMETERIZED`.\n\n        Args:\n            distribution: A instance of subclass of\n                :tf_main:`TF Distribution <distributions/Distribution>`,\n                or :tf_hmpg:`tensorflow_probability Distribution <probability>`,\n                Can be a class, its name or module path, or a class instance.\n            distribution_kwargs (dict, optional): Keyword arguments for the\n                distribution constructor. Ignored if `distribution` is a\n                class instance.\n            transform (bool): Whether to perform MLP transformation of the\n                distribution samples. If `False`, the structure/shape of a\n                sample must match :attr:`output_size`.\n            num_samples (optional): An `int` or `int` Tensor. Number of samples\n                to generate. If not given, generate a single sample. Note\n                that if batch size has already been included in\n                `distribution`'s dimensionality, `num_samples` should be\n                left as `None`.\n\n        Returns:\n            A tuple (output, sample), where\n\n            - output: A Tensor or a (nested) tuple of Tensors with the same \\\n            structure and size of :attr:`output_size`. The batch dimension \\\n            equals :attr:`num_samples` if specified, or is determined by the \\\n            distribution dimensionality.\n            - sample: The sample from the distribution, prior to transformation.\n\n        Raises:\n            ValueError: If distribution cannot be reparametrized.\n            ValueError: The output does not match :attr:`output_size`.\n        \"\"\"\n        dstr = check_or_get_instance(\n            distribution, distribution_kwargs,\n            [\"tensorflow.distributions\", \"tensorflow_probability.distributions\",\n             \"texar.custom\"])\n\n        if dstr.reparameterization_type == tf_dstr.NOT_REPARAMETERIZED:\n            raise ValueError(\n                \"Distribution is not reparameterized: %s\" % dstr.name)\n\n        if num_samples:\n            sample = dstr.sample(num_samples)\n        else:\n            sample = dstr.sample()\n\n        #if dstr.event_shape == []:\n        #    sample = tf.reshape(\n        #        sample,\n        #        sample.shape.concatenate(tf.TensorShape(1)))\n\n        # sample = tf.cast(sample, tf.float32)\n        if transform:\n            fn_modules = ['tensorflow', 'tensorflow.nn', 'texar.custom']\n            activation_fn = get_function(self.hparams.activation_fn, fn_modules)\n            output = _mlp_transform(sample, self._output_size, activation_fn)\n\n        _assert_same_size(output, self._output_size)\n\n        if not self._built:\n            self._add_internal_trainable_variables()\n            self._built = True\n\n        return output, sample\n\n\nclass StochasticConnector(ConnectorBase):\n    \"\"\"Samples from a distribution and transforms samples into specified size.\n\n    The connector is the same as\n    :class:`~texar.modules.ReparameterizedStochasticConnector`, except that\n    here reparameterization is disabled, and thus the gradients cannot be\n    back-propagated through the stochastic samples.\n\n    Args:\n        output_size: Size of output **excluding** the batch dimension. For\n            example, set `output_size` to `dim` to generate output of\n            shape `[batch_size, dim]`.\n            Can be an `int`, a tuple of `int`, a Tensorshape, or a tuple of\n            TensorShapes.\n            For example, to transform inputs to have decoder state size, set\n            `output_size=decoder.state_size`.\n        hparams (dict, optional): Hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n\n    .. document private functions\n    .. automethod:: _build\n    \"\"\"\n\n    def __init__(self, output_size, hparams=None):\n        ConnectorBase.__init__(self, output_size, hparams)\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        .. code-block:: python\n\n            {\n                \"activation_fn\": \"identity\",\n                \"name\": \"stochastic_connector\"\n            }\n\n        Here:\n\n        \"activation_fn\" : str\n            The activation function applied to the outputs of the MLP\n            transformation layer. Can\n            be a function, or its name or module path.\n\n        \"name\" : str\n            Name of the connector.\n        \"\"\"\n        return {\n            \"activation_fn\": \"tensorflow.identity\",\n            \"name\": \"stochastic_connector\"\n        }\n\n    def _build(self,\n               distribution='MultivariateNormalDiag',\n               distribution_kwargs=None,\n               transform=False,\n               num_samples=None):\n        \"\"\"Samples from a distribution and optionally performs transformation\n        with an MLP layer.\n\n        The inputs and outputs are the same as\n        :class:`~texar.modules.ReparameterizedStochasticConnector` except that\n        the distribution does not need to be reparameterizable, and gradient\n        cannot be back-propagate through the samples.\n\n        Args:\n            distribution: A instance of subclass of\n                :tf_main:`TF Distribution <distributions/Distribution>`,\n                or :tf_hmpg:`tensorflow_probability Distribution <probability>`.\n                Can be a class, its name or module path, or a class instance.\n            distribution_kwargs (dict, optional): Keyword arguments for the\n                distribution constructor. Ignored if `distribution` is a\n                class instance.\n            transform (bool): Whether to perform MLP transformation of the\n                distribution samples. If `False`, the structure/shape of a\n                sample must match :attr:`output_size`.\n            num_samples (optional): An `int` or `int` Tensor. Number of samples\n                to generate. If not given, generate a single sample. Note\n                that if batch size has already been included in\n                `distribution`'s dimensionality, `num_samples` should be\n                left as `None`.\n\n        Returns:\n            A tuple (output, sample), where\n\n            - output: A Tensor or a (nested) tuple of Tensors with the same \\\n            structure and size of :attr:`output_size`. The batch dimension \\\n            equals :attr:`num_samples` if specified, or is determined by the \\\n            distribution dimensionality.\n            - sample: The sample from the distribution, prior to transformation.\n\n        Raises:\n            ValueError: The output does not match :attr:`output_size`.\n        \"\"\"\n        dstr = check_or_get_instance(\n            distribution, distribution_kwargs,\n            [\"tensorflow.distributions\", \"tensorflow_probability.distributions\",\n             \"tensorflow.contrib.distributions\", \"texar.custom\"])\n\n        if num_samples:\n            output = dstr.sample(num_samples)\n        else:\n            output = dstr.sample()\n\n        if dstr.event_shape == []:\n            output = tf.reshape(output,\n                                output.shape.concatenate(tf.TensorShape(1)))\n\n        # Disable gradients through samples\n        output = tf.stop_gradient(output)\n\n        output = tf.cast(output, tf.float32)\n\n        if transform:\n            fn_modules = ['tensorflow', 'tensorflow.nn', 'texar.custom']\n            activation_fn = get_function(self.hparams.activation_fn, fn_modules)\n            output = _mlp_transform(output, self._output_size, activation_fn)\n\n        _assert_same_size(output, self._output_size)\n\n        if not self._built:\n            self._add_internal_trainable_variables()\n            self._built = True\n\n        return output\n\n\n#class ConcatConnector(ConnectorBase):\n#    \"\"\"Concatenates multiple connectors into one connector. Used in, e.g.,\n#    semi-supervised variational autoencoders, disentangled representation\n#    learning, and other models.\n#\n#    Args:\n#        output_size: Size of output excluding the batch dimension (eg.\n#            :attr:`output_size = p` if :attr:`output.shape` is :attr:`[N, p]`).\n#            Can be an int, a tuple of int, a Tensorshape, or a tuple of\n#            TensorShapes.\n#            For example, to transform to decoder state size, set\n#            `output_size=decoder.cell.state_size`.\n#        hparams (dict): Hyperparameters of the connector.\n#    \"\"\"\n#\n#    def __init__(self, output_size, hparams=None):\n#        ConnectorBase.__init__(self, output_size, hparams)\n#\n#    @staticmethod\n#    def default_hparams():\n#        \"\"\"Returns a dictionary of hyperparameters with default values.\n#\n#        Returns:\n#            .. code-block:: python\n#\n#                {\n#                    \"activation_fn\": \"tensorflow.identity\",\n#                    \"name\": \"concat_connector\"\n#                }\n#\n#            Here:\n#\n#            \"activation_fn\" : (str or callable)\n#                The name or full path to the activation function applied to\n#                the outputs of the MLP layer. The activation functions can be:\n#\n#                - Built-in activation functions defined in :mod:`tf` or \\\n#                  :mod:`tf.nn`, e.g., :tf_main:`identity <identity>`.\n#                - User-defined activation functions in `texar.custom`.\n#                - External activation functions. Must provide the full path, \\\n#                  e.g., \"my_module.my_activation_fn\".\n#\n#                The default value is :attr:`\"identity\"`, i.e., the MLP\n#                transformation is linear.\n#\n#            \"name\" : str\n#                Name of the connector.\n#\n#                The default value is \"concat_connector\".\n#        \"\"\"\n#        return {\n#            \"activation_fn\": \"tensorflow.identity\",\n#            \"name\": \"concat_connector\"\n#        }\n#\n#    def _build(self, connector_inputs, transform=True):\n#        \"\"\"Concatenate multiple input connectors\n#\n#        Args:\n#            connector_inputs: a list of connector states\n#            transform (bool): If `True`, then the output are automatically\n#                transformed to match :attr:`output_size`.\n#\n#        Returns:\n#            A Tensor or a (nested) tuple of Tensors of the same structure of\n#            the decoder state.\n#        \"\"\"\n#        connector_inputs = [tf.cast(connector, tf.float32)\n#                            for connector in connector_inputs]\n#        output = tf.concat(connector_inputs, axis=1)\n#\n#        if transform:\n#            fn_modules = ['texar.custom', 'tensorflow', 'tensorflow.nn']\n#            activation_fn = get_function(self.hparams.activation_fn,\n#                                         fn_modules)\n#            output = _mlp_transform(output, self._output_size, activation_fn)\n#        _assert_same_size(output, self._output_size)\n#\n#        self._add_internal_trainable_variables()\n#        self._built = True\n#\n#        return output\n"
  },
  {
    "path": "texar_repo/texar/modules/connectors/connectors_test.py",
    "content": "#\n\"\"\"\nUnit tests for connectors.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport tensorflow as tf\nfrom tensorflow_probability import distributions as tfpd\nfrom tensorflow.python.util import nest    # pylint: disable=E0611\n\nfrom texar.core import layers\nfrom texar.modules import ConstantConnector\nfrom texar.modules import MLPTransformConnector\nfrom texar.modules import ReparameterizedStochasticConnector\nfrom texar.modules.connectors.connectors import _assert_same_size\n\n# pylint: disable=too-many-locals, invalid-name\n\nclass TestConnectors(tf.test.TestCase):\n    \"\"\"Tests various connectors.\n    \"\"\"\n\n    def setUp(self):\n        tf.test.TestCase.setUp(self)\n        self._batch_size = 100\n\n        self._decoder_cell = layers.get_rnn_cell(\n            layers.default_rnn_cell_hparams())\n\n    def test_constant_connector(self):\n        \"\"\"Tests the logic of\n        :class:`~texar.modules.connectors.ConstantConnector`.\n        \"\"\"\n        connector = ConstantConnector(self._decoder_cell.state_size)\n\n        decoder_initial_state_0 = connector(self._batch_size)\n        decoder_initial_state_1 = connector(self._batch_size, value=1.)\n        nest.assert_same_structure(decoder_initial_state_0,\n                                   self._decoder_cell.state_size)\n        nest.assert_same_structure(decoder_initial_state_1,\n                                   self._decoder_cell.state_size)\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            s_0, s_1 = sess.run(\n                [decoder_initial_state_0, decoder_initial_state_1])\n            self.assertEqual(nest.flatten(s_0)[0][0, 0], 0.)\n            self.assertEqual(nest.flatten(s_1)[0][0, 0], 1.)\n\n    def test_forward_connector(self):\n        \"\"\"Tests the logic of\n        :class:`~texar.modules.connectors.ForwardConnector`.\n        \"\"\"\n        # TODO(zhiting)\n        pass\n\n    def test_mlp_transform_connector(self):\n        \"\"\"Tests the logic of\n        :class:`~texar.modules.connectors.MLPTransformConnector`.\n        \"\"\"\n        connector = MLPTransformConnector(self._decoder_cell.state_size)\n        output = connector(tf.zeros([5, 10]))\n        nest.assert_same_structure(output, self._decoder_cell.state_size)\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n\n            output_ = sess.run(output)\n            nest.assert_same_structure(output_, self._decoder_cell.state_size)\n\n    def test_reparameterized_stochastic_connector(self):\n        \"\"\"Tests the logic of\n        :class:`~texar.modules.ReparameterizedStochasticConnector`.\n        \"\"\"\n        state_size = (10, 10)\n        variable_size = 100\n        state_size_ts = (tf.TensorShape([10, 10]), tf.TensorShape([2, 3, 4]))\n        sample_num = 10\n\n        mu = tf.zeros([self._batch_size, variable_size])\n        var = tf.ones([self._batch_size, variable_size])\n        mu_vec = tf.zeros([variable_size])\n        var_vec = tf.ones([variable_size])\n        gauss_ds = tfpd.MultivariateNormalDiag(loc=mu, scale_diag=var)\n        gauss_ds_vec = tfpd.MultivariateNormalDiag(loc=mu_vec,\n                                                   scale_diag=var_vec)\n        gauss_connector = ReparameterizedStochasticConnector(state_size)\n        gauss_connector_ts = ReparameterizedStochasticConnector(state_size_ts)\n\n        output_1, _ = gauss_connector(gauss_ds)\n        output_2, _ = gauss_connector(\n            distribution=\"MultivariateNormalDiag\",\n            distribution_kwargs={\"loc\": mu, \"scale_diag\": var})\n        sample_ts, _ = gauss_connector_ts(gauss_ds)\n\n        # specify sample num\n        sample_test_num, _ = gauss_connector(\n            gauss_ds_vec, num_samples=sample_num)\n\n        # test when :attr:`transform` is False\n        #sample_test_no_transform = gauss_connector(gauss_ds, transform=False)\n\n        test_list = [output_1, output_2, sample_ts, sample_test_num]\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            out_list = sess.run(test_list)\n            out1 = out_list[0]\n            out2 = out_list[1]\n            out_ts = out_list[2]\n            out_test_num = out_list[3]\n\n            # check the same size\n            self.assertEqual(out_test_num[0].shape,\n                             tf.TensorShape([sample_num, state_size[0]]))\n            self.assertEqual(out1[0].shape,\n                             tf.TensorShape([self._batch_size, state_size[0]]))\n            self.assertEqual(out2[0].shape,\n                             tf.TensorShape([self._batch_size, state_size[0]]))\n            _assert_same_size(out_ts, state_size_ts)\n\n            # sample_mu = np.mean(sample_outputs, axis=0)\n            # # pylint: disable=no-member\n            # sample_var = np.var(sample_outputs, axis=0)\n\n            ## check if the value is approximated N(0, 1)\n            # for i in range(variable_size):\n               # self.assertAlmostEqual(0, sample_mu[i], delta=0.2)\n               # self.assertAlmostEqual(1, sample_var[i], delta=0.2)\n\n    #def test_concat_connector(self): # pylint: disable=too-many-locals\n    #    \"\"\"Tests the logic of\n    #    :class:`~texar.modules.connectors.ConcatConnector`.\n    #    \"\"\"\n    #    gauss_size = 5\n    #    constant_size = 7\n    #    variable_size = 13\n\n    #    decoder_size1 = 16\n    #    decoder_size2 = (16, 32)\n\n    #    gauss_connector = StochasticConnector(gauss_size)\n    #    categorical_connector = StochasticConnector(1)\n    #    constant_connector = ConstantConnector(constant_size)\n    #    concat_connector1 = ConcatConnector(decoder_size1)\n    #    concat_connector2 = ConcatConnector(decoder_size2)\n\n    #    # pylint: disable=invalid-name\n    #    mu = tf.zeros([self._batch_size, gauss_size])\n    #    var = tf.ones([self._batch_size, gauss_size])\n    #    categorical_prob = tf.constant(\n    #       [[0.1, 0.2, 0.7] for _ in xrange(self._batch_size)])\n    #    categorical_ds = tfds.Categorical(probs = categorical_prob)\n    #    gauss_ds = tfds.MultivariateNormalDiag(loc = mu, scale_diag = var)\n\n    #    gauss_state = gauss_connector(gauss_ds)\n    #    categorical_state = categorical_connector(categorical_ds)\n    #    constant_state = constant_connector(self._batch_size, value=1.)\n    #    with tf.Session() as debug_sess:\n    #        debug_cater = debug_sess.run(categorical_state)\n\n    #    state1 = concat_connector1(\n    #       [gauss_state, categorical_state, constant_state])\n    #    state2 = concat_connector2(\n    #       [gauss_state, categorical_state, constant_state])\n\n    #    with self.test_session() as sess:\n    #        sess.run(tf.global_variables_initializer())\n    #        [output1, output2] = sess.run([state1, state2])\n\n    #        # check the same size\n    #        self.assertEqual(output1.shape[1], decoder_size1)\n    #        self.assertEqual(output2[1].shape[1], decoder_size2[1])\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/modules/decoders/__init__.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nModules of texar library decoders.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=wildcard-import\n\nfrom texar.modules.decoders.rnn_decoder_base import *\nfrom texar.modules.decoders.rnn_decoders import *\nfrom texar.modules.decoders.rnn_decoder_helpers import *\nfrom texar.modules.decoders.transformer_decoders import *\nfrom texar.modules.decoders.beam_search_decode import *\n"
  },
  {
    "path": "texar_repo/texar/modules/decoders/beam_search_decode.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nBeam search decoding for RNN decoders.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\nfrom tensorflow.contrib.seq2seq import \\\n    dynamic_decode, AttentionWrapperState, AttentionWrapper, \\\n    BeamSearchDecoder, tile_batch\n\nfrom texar.modules.decoders.rnn_decoder_base import RNNDecoderBase\n\n# pylint: disable=too-many-arguments, protected-access, too-many-locals\n# pylint: disable=invalid-name\n\n__all__ = [\n    \"beam_search_decode\"\n]\n\ndef _get_initial_state(initial_state,\n                       tiled_initial_state,\n                       cell,\n                       batch_size,\n                       beam_width,\n                       dtype):\n    if tiled_initial_state is None:\n        if isinstance(initial_state, AttentionWrapperState):\n            raise ValueError(\n                '`initial_state` must not be an AttentionWrapperState. Use '\n                'a plain cell state instead, which will be wrapped into an '\n                'AttentionWrapperState automatically.')\n        if initial_state is None:\n            tiled_initial_state = cell.zero_state(batch_size * beam_width,\n                                                  dtype)\n        else:\n            tiled_initial_state = tile_batch(initial_state,\n                                             multiplier=beam_width)\n\n    if isinstance(cell, AttentionWrapper) and \\\n            not isinstance(tiled_initial_state, AttentionWrapperState):\n        zero_state = cell.zero_state(batch_size * beam_width, dtype)\n        tiled_initial_state = zero_state.clone(cell_state=tiled_initial_state)\n\n    return tiled_initial_state\n\ndef beam_search_decode(decoder_or_cell,\n                       embedding,\n                       start_tokens,\n                       end_token,\n                       beam_width,\n                       initial_state=None,\n                       tiled_initial_state=None,\n                       output_layer=None,\n                       length_penalty_weight=0.0,\n                       max_decoding_length=None,\n                       output_time_major=False,\n                       **kwargs):\n    \"\"\"Performs beam search sampling decoding.\n\n    Args:\n        decoder_or_cell: An instance of\n            subclass of :class:`~texar.modules.RNNDecoderBase`,\n            or an instance of :tf_main:`RNNCell <contrib/rnn/RNNCell>`. The\n            decoder or RNN cell to perform decoding.\n        embedding: A callable that takes a vector tensor of indexes (e.g.,\n            an instance of subclass of :class:`~texar.modules.EmbedderBase`),\n            or the :attr:`params` argument for\n            :tf_main:`tf.nn.embedding_lookup <nn/embedding_lookup>`.\n        start_tokens: `int32` vector shaped `[batch_size]`, the start tokens.\n        end_token: `int32` scalar, the token that marks end of decoding.\n        beam_width (int): Python integer, the number of beams.\n        initial_state (optional): Initial state of decoding. If `None`\n            (default), zero state is used.\n\n            The state must **not** be tiled with\n            :tf_main:`tile_batch <contrib/seq2seq/tile_batch>`.\n            If you have an already-tiled initial state, use\n            :attr:`tiled_initial_state` instead.\n\n            In the case of attention RNN decoder, `initial_state` must\n            **not** be an :tf_main:`AttentionWrapperState\n            <contrib/seq2seq/AttentionWrapperState>`. Instead, it must be a\n            state of the wrapped `RNNCell`, which state will be wrapped into\n            `AttentionWrapperState` automatically.\n\n            Ignored if :attr:`tiled_initial_state` is given.\n        tiled_initial_state (optional): Initial state that has been tiled\n            (typicaly with :tf_main:`tile_batch <contrib/seq2seq/tile_batch>`)\n            so that the batch dimension has size `batch_size * beam_width`.\n\n            In the case of attention RNN decoder, this can be either a state\n            of the wrapped `RNNCell`, or an `AttentionWrapperState`.\n\n            If not given, :attr:`initial_state` is used.\n        output_layer (optional): A :tf_main:`Layer <layers/Layer>` instance to\n            apply to the RNN output prior to storing the result or sampling. If\n            `None` and :attr:`decoder_or_cell` is a decoder, the decoder's\n            output layer will be used.\n        length_penalty_weight: Float weight to penalize length.\n            Disabled with `0.0` (default).\n        max_decoding_length (optional): A int scalar Tensor indicating the\n            maximum allowed number of decoding steps. If `None` (default),\n            decoding will continue until the end token is encountered.\n        output_time_major (bool): If `True`, outputs are returned as\n            time major tensors. If `False` (default), outputs are returned\n            as batch major tensors.\n        **kwargs: Other keyword arguments for :tf_main:`dynamic_decode\n            <contrib/seq2seq/dynamic_decode>` except argument\n            `maximum_iterations` which is set to :attr:`max_decoding_length`.\n\n    Returns:\n        A tuple `(outputs, final_state, sequence_length)`, where\n\n        - outputs: An instance of :tf_main:`FinalBeamSearchDecoderOutput \\\n        <contrib/seq2seq/FinalBeamSearchDecoderOutput>`.\n        - final_state: An instance of :tf_main:`BeamSearchDecoderState \\\n        <contrib/seq2seq/BeamSearchDecoderState>`.\n        - sequence_length: A Tensor of shape `[batch_size]` containing \\\n        the lengths of samples.\n\n    Example:\n\n        .. code-block:: python\n\n            ## Beam search with basic RNN decoder\n\n            embedder = WordEmbedder(vocab_size=data.vocab.size)\n            decoder = BasicRNNDecoder(vocab_size=data.vocab.size)\n\n            outputs, _, _, = beam_search_decode(\n                decoder_or_cell=decoder,\n                embedding=embedder,\n                start_tokens=[data.vocab.bos_token_id] * 100,\n                end_token=data.vocab.eos_token_id,\n                beam_width=5,\n                max_decoding_length=60)\n\n            sample_ids = sess.run(outputs.predicted_ids)\n            sample_text = tx.utils.map_ids_to_strs(sample_id[:,:,0], data.vocab)\n            print(sample_text)\n            # [\n            #   the first sequence sample .\n            #   the second sequence sample .\n            #   ...\n            # ]\n\n        .. code-block:: python\n\n            ## Beam search with attention RNN decoder\n\n            # Encodes the source\n            enc_embedder = WordEmbedder(data.source_vocab.size, ...)\n            encoder = UnidirectionalRNNEncoder(...)\n\n            enc_outputs, enc_state = encoder(\n                inputs=enc_embedder(data_batch['source_text_ids']),\n                sequence_length=data_batch['source_length'])\n\n            # Decodes while attending to the source\n            dec_embedder = WordEmbedder(vocab_size=data.target_vocab.size, ...)\n            decoder = AttentionRNNDecoder(\n                memory=enc_outputs,\n                memory_sequence_length=data_batch['source_length'],\n                vocab_size=data.target_vocab.size)\n\n            # Beam search\n            outputs, _, _, = beam_search_decode(\n                decoder_or_cell=decoder,\n                embedding=dec_embedder,\n                start_tokens=[data.vocab.bos_token_id] * 100,\n                end_token=data.vocab.eos_token_id,\n                beam_width=5,\n                initial_state=enc_state,\n                max_decoding_length=60)\n    \"\"\"\n    if isinstance(decoder_or_cell, RNNDecoderBase):\n        cell = decoder_or_cell._get_beam_search_cell(beam_width=beam_width)\n    elif isinstance(decoder_or_cell, tf.contrib.rnn.RNNCell):\n        cell = decoder_or_cell\n    else:\n        raise ValueError(\"`decoder` must be an instance of a subclass of \"\n                         \"either `RNNDecoderBase` or `RNNCell`.\")\n\n    start_tokens = tf.convert_to_tensor(\n        start_tokens, dtype=tf.int32, name=\"start_tokens\")\n    if start_tokens.get_shape().ndims != 1:\n        raise ValueError(\"`start_tokens` must be a vector\")\n    batch_size = tf.size(start_tokens)\n\n    initial_state = _get_initial_state(\n        initial_state, tiled_initial_state, cell,\n        batch_size, beam_width, tf.float32)\n\n    if output_layer is None and isinstance(decoder_or_cell, RNNDecoderBase):\n        output_layer = decoder_or_cell.output_layer\n\n    def _decode():\n        beam_docoder = BeamSearchDecoder(\n            cell=cell,\n            embedding=embedding,\n            start_tokens=start_tokens,\n            end_token=end_token,\n            initial_state=initial_state,\n            beam_width=beam_width,\n            output_layer=output_layer,\n            length_penalty_weight=length_penalty_weight)\n\n        if 'maximum_iterations' in kwargs:\n            raise ValueError('Use `max_decoding_length` to set the maximum '\n                             'allowed number of decoding steps.')\n        outputs, final_state, _ = dynamic_decode(\n            decoder=beam_docoder,\n            output_time_major=output_time_major,\n            maximum_iterations=max_decoding_length,\n            **kwargs)\n\n        return outputs, final_state, final_state.lengths\n\n    if isinstance(decoder_or_cell, RNNDecoderBase):\n        vs = decoder_or_cell.variable_scope\n        with tf.variable_scope(vs, reuse=tf.AUTO_REUSE):\n            return _decode()\n    else:\n        return _decode()\n"
  },
  {
    "path": "texar_repo/texar/modules/decoders/beam_search_decode_test.py",
    "content": "\"\"\"\nUnit tests for beam search decoding.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport numpy as np\n\nimport tensorflow as tf\nfrom tensorflow.contrib.seq2seq import dynamic_decode\nfrom tensorflow.contrib.seq2seq import BeamSearchDecoder, tile_batch\n\nimport texar as tx\nfrom texar.modules.decoders.beam_search_decode import beam_search_decode\nfrom texar import context\n\n# pylint: disable=no-member, too-many-instance-attributes, invalid-name\n# pylint: disable=too-many-locals, too-many-arguments\n\nclass BeamSearchDecodeTest(tf.test.TestCase):\n    \"\"\"Tests\n    :func:`texar.modules.decoders.beam_search_decode.beam_search_decode`.\n    \"\"\"\n\n    def setUp(self):\n        tf.test.TestCase.setUp(self)\n        self._vocab_size = 10\n        self._max_time = 16\n        self._batch_size = 8\n        self._emb_dim = 20\n        self._cell_dim = 256\n        self._attention_dim = self._cell_dim\n        self._beam_width = 11\n        self._inputs = tf.random_uniform(\n            [self._batch_size, self._max_time, self._emb_dim],\n            maxval=1., dtype=tf.float32)\n        self._embedding = tf.random_uniform(\n            [self._vocab_size, self._emb_dim], maxval=1., dtype=tf.float32)\n        self._encoder_output = tf.random_uniform(\n            [self._batch_size, self._max_time, 64])\n\n    def _test_beam_search(\n            self, decoder, initial_state=None, tiled_initial_state=None,\n            tf_initial_state=None, beam_width_1=1, initiated=False):\n        ## Compare with tf built-in BeamSearchDecoder\n        outputs, final_state, _ = beam_search_decode(\n            decoder_or_cell=decoder,\n            embedding=self._embedding,\n            start_tokens=[1]*self._batch_size,\n            end_token=2,\n            beam_width=beam_width_1,\n            max_decoding_length=20)\n\n        self.assertIsInstance(\n            outputs, tf.contrib.seq2seq.FinalBeamSearchDecoderOutput)\n        self.assertIsInstance(\n            final_state, tf.contrib.seq2seq.BeamSearchDecoderState)\n\n        num_trainable_variables = len(tf.trainable_variables())\n        _ = decoder(\n            decoding_strategy='infer_greedy',\n            embedding=self._embedding,\n            start_tokens=[1]*self._batch_size,\n            end_token=2,\n            max_decoding_length=20)\n        self.assertEqual(num_trainable_variables, len(tf.trainable_variables()))\n\n        if tf_initial_state is None:\n            tf_initial_state = decoder.cell.zero_state(\n                self._batch_size * beam_width_1, tf.float32)\n        beam_decoder = BeamSearchDecoder(\n            cell=decoder.cell,\n            embedding=self._embedding,\n            start_tokens=[1]*self._batch_size,\n            end_token=2,\n            initial_state=tf_initial_state,\n            beam_width=beam_width_1,\n            output_layer=decoder.output_layer)\n\n        outputs_1, final_state_1, _ = dynamic_decode(\n            decoder=beam_decoder, maximum_iterations=20)\n\n        ## Tests time major\n        outputs_2, _, _ = beam_search_decode(\n            decoder_or_cell=decoder,\n            embedding=self._embedding,\n            start_tokens=[1]*self._batch_size,\n            end_token=2,\n            beam_width=self._beam_width,\n            initial_state=initial_state,\n            tiled_initial_state=tiled_initial_state,\n            max_decoding_length=21)\n        outputs_3, _, _ = beam_search_decode(\n            decoder_or_cell=decoder,\n            embedding=self._embedding,\n            start_tokens=[1]*self._batch_size,\n            end_token=2,\n            beam_width=self._beam_width,\n            initial_state=initial_state,\n            tiled_initial_state=tiled_initial_state,\n            max_decoding_length=21,\n            output_time_major=True)\n\n\n        with self.test_session() as sess:\n            if not initiated:\n                sess.run(tf.global_variables_initializer())\n\n            outputs_, final_state_, outputs_1_, final_state_1_ = sess.run(\n                [outputs, final_state, outputs_1, final_state_1],\n                feed_dict={context.global_mode():\n                           tf.estimator.ModeKeys.PREDICT})\n\n            np.testing.assert_array_equal(\n                outputs_.predicted_ids, outputs_1_.predicted_ids)\n            np.testing.assert_array_equal(\n                outputs_.beam_search_decoder_output.scores,\n                outputs_1_.beam_search_decoder_output.scores)\n            np.testing.assert_array_equal(\n                outputs_.beam_search_decoder_output.predicted_ids,\n                outputs_1_.beam_search_decoder_output.predicted_ids)\n            np.testing.assert_array_equal(\n                outputs_.beam_search_decoder_output.parent_ids,\n                outputs_1_.beam_search_decoder_output.parent_ids)\n            np.testing.assert_array_equal(\n                final_state_.log_probs, final_state_1_.log_probs)\n            np.testing.assert_array_equal(\n                final_state_.lengths, final_state_1_.lengths)\n\n            outputs_2_, outputs_3_ = sess.run(\n                [outputs_2, outputs_3],\n                feed_dict={context.global_mode():\n                           tf.estimator.ModeKeys.PREDICT})\n            self.assertEqual(outputs_2_.predicted_ids.shape,\n                             tuple([self._batch_size, 21, 11]))\n            self.assertEqual(outputs_3_.predicted_ids.shape,\n                             tuple([21, self._batch_size, 11]))\n\n    def test_basic_rnn_decoder_beam_search(self):\n        \"\"\"Tests beam search with BasicRNNDecoder.\n        \"\"\"\n        hparams = {\n            \"rnn_cell\": {\n                \"kwargs\": {\"num_units\": self._cell_dim}\n            }\n        }\n        decoder = tx.modules.BasicRNNDecoder(\n            vocab_size=self._vocab_size,\n            hparams=hparams)\n\n        self._test_beam_search(decoder)\n\n        self._test_beam_search(\n            decoder, beam_width_1=self._beam_width, initiated=True)\n\n    def test_basic_rnn_decoder_given_initial_state(self):\n        \"\"\"Tests beam search with BasicRNNDecoder given initial state.\n        \"\"\"\n        hparams = {\n            \"rnn_cell\": {\n                \"kwargs\": {\"num_units\": self._cell_dim}\n            }\n        }\n        decoder = tx.modules.BasicRNNDecoder(\n            vocab_size=self._vocab_size,\n            hparams=hparams)\n\n        # (zhiting): The beam search decoder does not generate max-length\n        # samples if only one cell_state is created. Perhaps due to\n        # random seed or bugs?\n        cell_state = decoder.cell.zero_state(self._batch_size, tf.float32)\n        cell_state = decoder.cell.zero_state(self._batch_size, tf.float32)\n\n        self._test_beam_search(decoder, initial_state=cell_state)\n\n        tiled_cell_state = tile_batch(cell_state, multiplier=self._beam_width)\n        self._test_beam_search(\n            decoder, tiled_initial_state=tiled_cell_state, initiated=True)\n\n    def test_attention_decoder_beam_search(self):\n        \"\"\"Tests beam search with RNNAttentionDecoder.\n        \"\"\"\n        seq_length = np.random.randint(\n            self._max_time, size=[self._batch_size]) + 1\n        encoder_values_length = tf.constant(seq_length)\n        hparams = {\n            \"attention\": {\n                \"kwargs\": {\"num_units\": self._attention_dim}\n            },\n            \"rnn_cell\": {\n                \"kwargs\": {\"num_units\": self._cell_dim}\n            }\n        }\n        decoder = tx.modules.AttentionRNNDecoder(\n            vocab_size=self._vocab_size,\n            memory=self._encoder_output,\n            memory_sequence_length=encoder_values_length,\n            hparams=hparams)\n\n        self._test_beam_search(decoder)\n\n    def test_attention_decoder_given_initial_state(self):\n        \"\"\"Tests beam search with RNNAttentionDecoder given initial state.\n        \"\"\"\n        seq_length = np.random.randint(\n            self._max_time, size=[self._batch_size]) + 1\n        encoder_values_length = tf.constant(seq_length)\n        hparams = {\n            \"attention\": {\n                \"kwargs\": {\"num_units\": self._attention_dim}\n            },\n            \"rnn_cell\": {\n                \"kwargs\": {\"num_units\": self._cell_dim}\n            }\n        }\n        decoder = tx.modules.AttentionRNNDecoder(\n            vocab_size=self._vocab_size,\n            memory=self._encoder_output,\n            memory_sequence_length=encoder_values_length,\n            hparams=hparams)\n\n        state = decoder.cell.zero_state(self._batch_size, tf.float32)\n\n        cell_state = state.cell_state\n        self._test_beam_search(decoder, initial_state=cell_state)\n\n        tiled_cell_state = tile_batch(cell_state, multiplier=self._beam_width)\n        self._test_beam_search(\n            decoder, tiled_initial_state=tiled_cell_state, initiated=True)\n\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/modules/decoders/rnn_decoder_base.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nBase class for RNN decoders.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=too-many-arguments, no-name-in-module\n# pylint: disable=too-many-branches, protected-access, too-many-locals\n# pylint: disable=arguments-differ, unused-argument\n\nimport copy\n\nimport tensorflow as tf\nfrom tensorflow.contrib.seq2seq import Decoder as TFDecoder\nfrom tensorflow.contrib.seq2seq import dynamic_decode\nfrom tensorflow.python.framework import tensor_shape\nfrom tensorflow.python.util import nest\n\nfrom texar.core import layers\nfrom texar.utils import utils\nfrom texar.utils.mode import is_train_mode, is_train_mode_py\nfrom texar.module_base import ModuleBase\nfrom texar.modules.decoders import rnn_decoder_helpers\n\n__all__ = [\n    \"RNNDecoderBase\"\n]\n\nclass RNNDecoderBase(ModuleBase, TFDecoder):\n    \"\"\"Base class inherited by all RNN decoder classes.\n    See :class:`~texar.modules.BasicRNNDecoder` for the argumenrts.\n\n    See :meth:`_build` for the inputs and outputs of RNN decoders in general.\n\n    .. document private functions\n    .. automethod:: _build\n    \"\"\"\n\n    def __init__(self,\n                 cell=None,\n                 vocab_size=None,\n                 output_layer=None,\n                 cell_dropout_mode=None,\n                 hparams=None):\n        ModuleBase.__init__(self, hparams)\n\n        self._helper = None\n        self._initial_state = None\n\n        # Make rnn cell\n        with tf.variable_scope(self.variable_scope):\n            if cell is not None:\n                self._cell = cell\n            else:\n                self._cell = layers.get_rnn_cell(\n                    self._hparams.rnn_cell, cell_dropout_mode)\n        self._beam_search_cell = None\n\n        # Make the output layer\n        self._vocab_size = vocab_size\n        self._output_layer = output_layer\n        if output_layer is None:\n            if self._vocab_size is None:\n                raise ValueError(\n                    \"Either `output_layer` or `vocab_size` must be provided. \"\n                    \"Set `output_layer=tf.identity` if no output layer is \"\n                    \"wanted.\")\n            with tf.variable_scope(self.variable_scope):\n                self._output_layer = tf.layers.Dense(units=self._vocab_size)\n        elif output_layer is not tf.identity:\n            if not isinstance(output_layer, tf.layers.Layer):\n                raise ValueError(\n                    \"`output_layer` must be either `tf.identity` or \"\n                    \"an instance of `tf.layers.Layer`.\")\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        The hyperparameters are the same as in\n        :meth:`~texar.modules.BasicRNNDecoder.default_hparams` of\n        :class:`~texar.modules.BasicRNNDecoder`, except that the default\n        \"name\" here is \"rnn_decoder\".\n        \"\"\"\n        return {\n            \"rnn_cell\": layers.default_rnn_cell_hparams(),\n            \"helper_train\": rnn_decoder_helpers.default_helper_train_hparams(),\n            \"helper_infer\": rnn_decoder_helpers.default_helper_infer_hparams(),\n            \"max_decoding_length_train\": None,\n            \"max_decoding_length_infer\": None,\n            \"name\": \"rnn_decoder\"\n        }\n\n    def _build(self,\n               decoding_strategy=\"train_greedy\",\n               initial_state=None,\n               inputs=None,\n               sequence_length=None,\n               embedding=None,\n               start_tokens=None,\n               end_token=None,\n               softmax_temperature=None,\n               max_decoding_length=None,\n               impute_finished=False,\n               output_time_major=False,\n               input_time_major=False,\n               helper=None,\n               mode=None,\n               **kwargs):\n        \"\"\"Performs decoding. This is a shared interface for both\n        :class:`~texar.modules.BasicRNNDecoder` and\n        :class:`~texar.modules.AttentionRNNDecoder`.\n\n        The function provides **3 ways** to specify the\n        decoding method, with varying flexibility:\n\n        1. The :attr:`decoding_strategy` argument: A string taking value of:\n\n            - **\"train_greedy\"**: decoding in teacher-forcing fashion \\\n              (i.e., feeding \\\n              `ground truth` to decode the next step), and each sample is \\\n              obtained by taking the `argmax` of the RNN output logits. \\\n              Arguments :attr:`(inputs, sequence_length, input_time_major)` \\\n              are required for this strategy, and argument :attr:`embedding` \\\n              is optional.\n            - **\"infer_greedy\"**: decoding in inference fashion (i.e., feeding \\\n              the `generated` sample to decode the next step), and each sample\\\n              is obtained by taking the `argmax` of the RNN output logits.\\\n              Arguments :attr:`(embedding, start_tokens, end_token)` are \\\n              required for this strategy, and argument \\\n              :attr:`max_decoding_length` is optional.\n            - **\"infer_sample\"**: decoding in inference fashion, and each\n              sample is obtained by `random sampling` from the RNN output\n              distribution. Arguments \\\n              :attr:`(embedding, start_tokens, end_token)` are \\\n              required for this strategy, and argument \\\n              :attr:`max_decoding_length` is optional.\n\n          This argument is used only when argument :attr:`helper` is `None`.\n\n          Example:\n\n            .. code-block:: python\n\n                embedder = WordEmbedder(vocab_size=data.vocab.size)\n                decoder = BasicRNNDecoder(vocab_size=data.vocab.size)\n\n                # Teacher-forcing decoding\n                outputs_1, _, _ = decoder(\n                    decoding_strategy='train_greedy',\n                    inputs=embedder(data_batch['text_ids']),\n                    sequence_length=data_batch['length']-1)\n\n                # Random sample decoding. Gets 100 sequence samples\n                outputs_2, _, sequence_length = decoder(\n                    decoding_strategy='infer_sample',\n                    start_tokens=[data.vocab.bos_token_id]*100,\n                    end_token=data.vocab.eos.token_id,\n                    embedding=embedder,\n                    max_decoding_length=60)\n\n        2. The :attr:`helper` argument: An instance of subclass of \\\n           :tf_main:`tf.contrib.seq2seq.Helper <contrib/seq2seq/Helper>`. This \\\n           provides a superset of decoding strategies than above, for example:\n\n            - :tf_main:`TrainingHelper\n              <contrib/seq2seq/TrainingHelper>` corresponding to the \\\n              \"train_greedy\" strategy.\n            - :tf_main:`ScheduledEmbeddingTrainingHelper\n              <contrib/seq2seq/ScheduledEmbeddingTrainingHelper>` and \\\n              :tf_main:`ScheduledOutputTrainingHelper\n              <contrib/seq2seq/ScheduledOutputTrainingHelper>` for scheduled \\\n              sampling.\n            - :class:`~texar.modules.SoftmaxEmbeddingHelper` and \\\n              :class:`~texar.modules.GumbelSoftmaxEmbeddingHelper` for \\\n              soft decoding and gradient backpropagation.\n\n          This means gives the maximal flexibility of configuring the decoding\\\n          strategy.\n\n          Example:\n\n            .. code-block:: python\n\n                embedder = WordEmbedder(vocab_size=data.vocab.size)\n                decoder = BasicRNNDecoder(vocab_size=data.vocab.size)\n\n                # Teacher-forcing decoding, same as above with\n                # `decoding_strategy='train_greedy'`\n                helper_1 = tf.contrib.seq2seq.TrainingHelper(\n                    inputs=embedders(data_batch['text_ids']),\n                    sequence_length=data_batch['length']-1)\n                outputs_1, _, _ = decoder(helper=helper_1)\n\n                # Gumbel-softmax decoding\n                helper_2 = GumbelSoftmaxEmbeddingHelper(\n                    embedding=embedder,\n                    start_tokens=[data.vocab.bos_token_id]*100,\n                    end_token=data.vocab.eos_token_id,\n                    tau=0.1)\n                outputs_2, _, sequence_length = decoder(\n                    max_decoding_length=60, helper=helper_2)\n\n        3. :attr:`hparams[\"helper_train\"]` and :attr:`hparams[\"helper_infer\"]`:\\\n           Specifying the helper through hyperparameters. Train and infer \\\n           strategy is toggled based on :attr:`mode`. Appriopriate arguments \\\n           (e.g., :attr:`inputs`, :attr:`start_tokens`, etc) are selected to \\\n           construct the helper. Additional arguments for helper constructor \\\n           can be provided either through :attr:`**kwargs`, or through \\\n           :attr:`hparams[\"helper_train/infer\"][\"kwargs\"]`.\n\n           This means is used only when both :attr:`decoding_strategy` and \\\n           :attr:`helper` are `None`.\n\n           Example:\n\n             .. code-block:: python\n\n                h = {\n                    \"helper_infer\": {\n                        \"type\": \"GumbelSoftmaxEmbeddingHelper\",\n                        \"kwargs\": { \"tau\": 0.1 }\n                    }\n                }\n                embedder = WordEmbedder(vocab_size=data.vocab.size)\n                decoder = BasicRNNDecoder(vocab_size=data.vocab.size, hparams=h)\n\n                # Gumbel-softmax decoding\n                output, _, _ = decoder(\n                    decoding_strategy=None, # Sets to None explicit\n                    embedding=embedder,\n                    start_tokens=[data.vocab.bos_token_id]*100,\n                    end_token=data.vocab.eos_token_id,\n                    max_decoding_length=60,\n                    mode=tf.estimator.ModeKeys.PREDICT)\n                        # PREDICT mode also shuts down dropout\n\n        Args:\n            decoding_strategy (str): A string specifying the decoding\n                strategy. Different arguments are required based on the\n                strategy.\n                Ignored if :attr:`helper` is given.\n            initial_state (optional): Initial state of decoding.\n                If `None` (default), zero state is used.\n\n            inputs (optional): Input tensors for teacher forcing decoding.\n                Used when `decoding_strategy` is set to \"train_greedy\", or\n                when `hparams`-configured helper is used.\n\n                - If :attr:`embedding` is `None`, `inputs` is directly \\\n                fed to the decoder. E.g., in `\"train_greedy\"` strategy, \\\n                `inputs` must be a 3D Tensor of shape \\\n                `[batch_size, max_time, emb_dim]` (or \\\n                `[max_time, batch_size, emb_dim]` if `input_time_major`==True).\n                - If `embedding` is given, `inputs` is used as index \\\n                to look up embeddings and feed in the decoder. \\\n                E.g., if `embedding` is an instance of \\\n                :class:`~texar.modules.WordEmbedder`, \\\n                then :attr:`inputs` is usually a 2D int Tensor \\\n                `[batch_size, max_time]` (or \\\n                `[max_time, batch_size]` if `input_time_major`==True) \\\n                containing the token indexes.\n            sequence_length (optional): A 1D int Tensor containing the\n                sequence length of :attr:`inputs`.\n                Used when `decoding_strategy=\"train_greedy\"` or\n                `hparams`-configured helper is used.\n            embedding (optional): A callable that returns embedding vectors\n                of `inputs` (e.g., an instance of subclass of\n                :class:`~texar.modules.EmbedderBase`), or the `params`\n                argument of\n                :tf_main:`tf.nn.embedding_lookup <nn/embedding_lookup>`.\n                If provided, `inputs` (if used) will be passed to\n                `embedding` to fetch the embedding vectors of the inputs.\n                Required when `decoding_strategy=\"infer_greedy\"`\n                or `\"infer_sample\"`; optional when\n                `decoding_strategy=\"train_greedy\"`.\n            start_tokens (optional): A int Tensor of shape `[batch_size]`,\n                the start tokens.\n                Used when `decoding_strategy=\"infer_greedy\"` or\n                `\"infer_sample\"`, or when `hparams`-configured\n                helper is used.\n                Companying with Texar data module, to get `batch_size` samples\n                where batch_size is changing according to the data module,\n                this can be set as\n                `start_tokens=tf.ones_like(batch['length'])*bos_token_id`.\n            end_token (optional): A int 0D Tensor, the token that marks end\n                of decoding.\n                Used when `decoding_strategy=\"infer_greedy\"` or\n                `\"infer_sample\"`, or when `hparams`-configured\n                helper is used.\n            softmax_temperature (optional): A float 0D Tensor, value to divide\n                the logits by before computing the softmax. Larger values\n                (above 1.0) result in more random samples. Must > 0. If `None`,\n                1.0 is used.\n                Used when `decoding_strategy=\"infer_sample\"`.\n            max_decoding_length: A int scalar Tensor indicating the maximum\n                allowed number of decoding steps. If `None` (default), either\n                `hparams[\"max_decoding_length_train\"]` or\n                `hparams[\"max_decoding_length_infer\"]` is used\n                according to :attr:`mode`.\n            impute_finished (bool): If `True`, then states for batch\n                entries which are marked as finished get copied through and\n                the corresponding outputs get zeroed out.  This causes some\n                slowdown at each time step, but ensures that the final state\n                and outputs have the correct values and that backprop ignores\n                time steps that were marked as finished.\n            output_time_major (bool): If `True`, outputs are returned as\n                time major tensors. If `False` (default), outputs are returned\n                as batch major tensors.\n            input_time_major (optional): Whether the :attr:`inputs` tensor is\n                time major.\n                Used when `decoding_strategy=\"train_greedy\"` or\n                `hparams`-configured helper is used.\n            helper (optional): An instance of\n                :tf_main:`Helper <contrib/seq2seq/Helper>`\n                that defines the decoding strategy. If given,\n                `decoding_strategy`\n                and helper configs in :attr:`hparams` are ignored.\n            mode (str, optional): A string taking value in\n                :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`. If\n                `TRAIN`, training related hyperparameters are used (e.g.,\n                `hparams['max_decoding_length_train']`), otherwise,\n                inference related hyperparameters are used (e.g.,\n                `hparams['max_decoding_length_infer']`).\n                If `None` (default), `TRAIN` mode is used.\n            **kwargs: Other keyword arguments for constructing helpers\n                defined by `hparams[\"helper_trainn\"]` or\n                `hparams[\"helper_infer\"]`.\n\n        Returns:\n            `(outputs, final_state, sequence_lengths)`, where\n\n            - **`outputs`**: an object containing the decoder output on all \\\n            time steps.\n            - **`final_state`**: is the cell state of the final time step.\n            - **`sequence_lengths`**: is an int Tensor of shape `[batch_size]` \\\n            containing the length of each sample.\n        \"\"\"\n        # Helper\n        if helper is not None:\n            pass\n        elif decoding_strategy is not None:\n            if decoding_strategy == \"train_greedy\":\n                helper = rnn_decoder_helpers._get_training_helper(\n                    inputs, sequence_length, embedding, input_time_major)\n            elif decoding_strategy == \"infer_greedy\":\n                helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(\n                    embedding, start_tokens, end_token)\n            elif decoding_strategy == \"infer_sample\":\n                helper = tf.contrib.seq2seq.SampleEmbeddingHelper(\n                    embedding, start_tokens, end_token, softmax_temperature)\n            else:\n                raise ValueError(\n                    \"Unknown decoding strategy: {}\".format(decoding_strategy))\n        else:\n            if is_train_mode_py(mode):\n                kwargs_ = copy.copy(self._hparams.helper_train.kwargs.todict())\n                helper_type = self._hparams.helper_train.type\n            else:\n                kwargs_ = copy.copy(self._hparams.helper_infer.kwargs.todict())\n                helper_type = self._hparams.helper_infer.type\n            kwargs_.update({\n                \"inputs\": inputs,\n                \"sequence_length\": sequence_length,\n                \"time_major\": input_time_major,\n                \"embedding\": embedding,\n                \"start_tokens\": start_tokens,\n                \"end_token\": end_token,\n                \"softmax_temperature\": softmax_temperature})\n            kwargs_.update(kwargs)\n            helper = rnn_decoder_helpers.get_helper(helper_type, **kwargs_)\n        self._helper = helper\n\n        # Initial state\n        if initial_state is not None:\n            self._initial_state = initial_state\n        else:\n            self._initial_state = self.zero_state(\n                batch_size=self.batch_size, dtype=tf.float32)\n\n        # Maximum decoding length\n        max_l = max_decoding_length\n        if max_l is None:\n            max_l_train = self._hparams.max_decoding_length_train\n            if max_l_train is None:\n                max_l_train = utils.MAX_SEQ_LENGTH\n            max_l_infer = self._hparams.max_decoding_length_infer\n            if max_l_infer is None:\n                max_l_infer = utils.MAX_SEQ_LENGTH\n            max_l = tf.cond(is_train_mode(mode),\n                            lambda: max_l_train, lambda: max_l_infer)\n\n        # Decode\n        outputs, final_state, sequence_lengths = dynamic_decode(\n            decoder=self, impute_finished=impute_finished,\n            maximum_iterations=max_l, output_time_major=output_time_major)\n\n        if not self._built:\n            self._add_internal_trainable_variables()\n            # Add trainable variables of `self._cell` which may be\n            # constructed externally.\n            self._add_trainable_variable(\n                layers.get_rnn_cell_trainable_variables(self._cell))\n            if isinstance(self._output_layer, tf.layers.Layer):\n                self._add_trainable_variable(\n                    self._output_layer.trainable_variables)\n            # Add trainable variables of `self._beam_search_rnn_cell` which\n            # may already be constructed and used.\n            if self._beam_search_cell is not None:\n                self._add_trainable_variable(\n                    self._beam_search_cell.trainable_variables)\n\n            self._built = True\n\n        return outputs, final_state, sequence_lengths\n\n    def _get_beam_search_cell(self, **kwargs):\n        self._beam_search_cell = self._cell\n        return self._cell\n\n    def _rnn_output_size(self):\n        size = self._cell.output_size\n        if self._output_layer is tf.identity:\n            return size\n        else:\n            # To use layer's compute_output_shape, we need to convert the\n            # RNNCell's output_size entries into shapes with an unknown\n            # batch size.  We then pass this through the layer's\n            # compute_output_shape and read off all but the first (batch)\n            # dimensions to get the output size of the rnn with the layer\n            # applied to the top.\n            output_shape_with_unknown_batch = nest.map_structure(\n                lambda s: tensor_shape.TensorShape([None]).concatenate(s),\n                size)\n            layer_output_shape = self._output_layer.compute_output_shape(\n                output_shape_with_unknown_batch)\n            return nest.map_structure(lambda s: s[1:], layer_output_shape)\n\n    @property\n    def batch_size(self):\n        return self._helper.batch_size\n\n    @property\n    def output_size(self):\n        \"\"\"Output size of one step.\n        \"\"\"\n        raise NotImplementedError\n\n    @property\n    def output_dtype(self):\n        \"\"\"Types of output of one step.\n        \"\"\"\n        raise NotImplementedError\n\n    def initialize(self, name=None):\n        # Inherits from TFDecoder\n        # All RNN decoder classes must implement this\n        raise NotImplementedError\n\n    def step(self, time, inputs, state, name=None):\n        # Inherits from TFDecoder\n        # All RNN decoder classes must implement this\n        raise NotImplementedError\n\n    def finalize(self, outputs, final_state, sequence_lengths):\n        # Inherits from TFDecoder\n        # All RNN decoder classes must implement this\n        raise NotImplementedError\n\n    @property\n    def cell(self):\n        \"\"\"The RNN cell.\n        \"\"\"\n        return self._cell\n\n    def zero_state(self, batch_size, dtype):\n        \"\"\"Zero state of the RNN cell.\n        Equivalent to :attr:`decoder.cell.zero_state`.\n        \"\"\"\n        return self._cell.zero_state(\n            batch_size=batch_size, dtype=dtype)\n\n    @property\n    def state_size(self):\n        \"\"\"The state size of decoder cell.\n        Equivalent to :attr:`decoder.cell.state_size`.\n        \"\"\"\n        return self.cell.state_size\n\n    @property\n    def vocab_size(self):\n        \"\"\"The vocab size.\n        \"\"\"\n        return self._vocab_size\n\n    @property\n    def output_layer(self):\n        \"\"\"The output layer.\n        \"\"\"\n        return self._output_layer\n"
  },
  {
    "path": "texar_repo/texar/modules/decoders/rnn_decoder_helpers.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nVarious helper classes and utilities for RNN decoders.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\nfrom tensorflow.contrib.seq2seq import TrainingHelper as TFTrainingHelper\nfrom tensorflow.contrib.seq2seq import Helper as TFHelper\nfrom tensorflow.contrib.distributions import RelaxedOneHotCategorical \\\n    as GumbelSoftmax\n\nfrom texar.modules.embedders.embedder_base import EmbedderBase\nfrom texar.utils import utils\n\n# pylint: disable=not-context-manager, too-many-arguments\n# pylint: disable=too-many-instance-attributes\n\n__all__ = [\n    \"default_helper_train_hparams\",\n    \"default_helper_infer_hparams\",\n    \"get_helper\",\n    \"_get_training_helper\",\n    \"GumbelSoftmaxEmbeddingHelper\",\n    \"SoftmaxEmbeddingHelper\",\n]\n\ndef default_helper_train_hparams():\n    \"\"\"Returns default hyperparameters of an RNN decoder helper in the training\n    phase.\n\n    See also :meth:`~texar.modules.decoders.rnn_decoder_helpers.get_helper`\n    for information of the hyperparameters.\n\n    Returns:\n        dict: A dictionary with following structure and values:\n\n        .. code-block:: python\n\n            {\n                # The `helper_type` argument for `get_helper`, i.e., the name\n                # or full path to the helper class.\n                \"type\": \"TrainingHelper\",\n\n                # The `**kwargs` argument for `get_helper`, i.e., additional\n                # keyword arguments for constructing the helper.\n                \"kwargs\": {}\n            }\n    \"\"\"\n    return {\n        \"type\": \"TrainingHelper\",\n        \"kwargs\": {}\n    }\n\ndef default_helper_infer_hparams():\n    \"\"\"Returns default hyperparameters of an RNN decoder helper in the inference\n    phase.\n\n    See also :meth:`~texar.modules.decoders.rnn_decoder_helpers.get_helper`\n    for information of the hyperparameters.\n\n    Returns:\n        dict: A dictionary with following structure and values:\n\n        .. code-block:: python\n\n            {\n                # The `helper_type` argument for `get_helper`, i.e., the name\n                # or full path to the helper class.\n                \"type\": \"SampleEmbeddingHelper\",\n\n                # The `**kwargs` argument for `get_helper`, i.e., additional\n                # keyword arguments for constructing the helper.\n                \"kwargs\": {}\n            }\n    \"\"\"\n    return {\n        \"type\": \"SampleEmbeddingHelper\",\n        \"kwargs\": {}\n    }\n\n\ndef get_helper(helper_type,\n               inputs=None,\n               sequence_length=None,\n               embedding=None,\n               start_tokens=None,\n               end_token=None,\n               **kwargs):\n    \"\"\"Creates a Helper instance.\n\n    Args:\n        helper_type: A :tf_main:`Helper <contrib/seq2seq/Helper>` class, its\n            name or module path, or a class instance. If a class instance\n            is given, it is returned directly.\n        inputs (optional): Inputs to the RNN decoder, e.g., ground truth\n            tokens for teacher forcing decoding.\n        sequence_length (optional): A 1D int Tensor containing the\n            sequence length of :attr:`inputs`.\n        embedding (optional): A callable that takes a vector tensor of\n            indexes (e.g., an instance of subclass of\n            :class:`~texar.modules.EmbedderBase`), or the `params` argument\n            for `embedding_lookup` (e.g., the embedding Tensor).\n        start_tokens (optional): A int Tensor of shape `[batch_size]`,\n            the start tokens.\n        end_token (optional): A int 0D Tensor, the token that marks end\n            of decoding.\n        **kwargs: Additional keyword arguments for constructing the helper.\n\n    Returns:\n        A helper instance.\n    \"\"\"\n    module_paths = [\n        'texar.modules.decoders.rnn_decoder_helpers',\n        'tensorflow.contrib.seq2seq',\n        'texar.custom']\n    class_kwargs = {\"inputs\": inputs,\n                    \"sequence_length\": sequence_length,\n                    \"embedding\": embedding,\n                    \"start_tokens\": start_tokens,\n                    \"end_token\": end_token}\n    class_kwargs.update(kwargs)\n    return utils.check_or_get_instance_with_redundant_kwargs(\n        helper_type, class_kwargs, module_paths)\n\n\ndef _get_training_helper( #pylint: disable=invalid-name\n        inputs, sequence_length, embedding=None, time_major=False, name=None):\n    \"\"\"Returns an instance of :tf_main:`TrainingHelper\n    <contrib/seq2seq/TrainingHelper>` given embeddings.\n\n    Args:\n        inputs: If :attr:`embedding` is given, this is sequences of input\n            token indexes. If :attr:`embedding` is `None`, this is passed to\n            TrainingHelper directly.\n        sequence_length (1D Tensor): Lengths of input token sequences.\n        embedding (optional): The `params` argument of\n            :tf_main:`tf.nn.embedding_lookup\n            <nn/embedding_lookup>` (e.g., the embedding Tensor); or a callable\n            that takes a vector of integer indexes and returns respective\n            embedding (e.g., an instance of subclass of\n            :class:`~texar.modules.EmbedderBase`).\n        time_major (bool): Whether the tensors in `inputs` are time major.\n            If `False` (default), they are assumed to be batch major.\n        name (str, optional): Name scope for any created operations.\n\n    Returns:\n        An instance of TrainingHelper.\n\n    Raises:\n        ValueError: if `sequence_length` is not a 1D tensor.\n    \"\"\"\n    if embedding is None:\n        return TFTrainingHelper(inputs=inputs,\n                                sequence_length=sequence_length,\n                                time_major=time_major,\n                                name=name)\n\n    with tf.name_scope(name, \"TrainingHelper\", [embedding, inputs]):\n        if callable(embedding):\n            embedding_fn = embedding\n        else:\n            embedding_fn = (\n                lambda ids: tf.nn.embedding_lookup(embedding, ids))\n        emb_inputs = embedding_fn(inputs)\n    helper = TFTrainingHelper(inputs=emb_inputs,\n                              sequence_length=sequence_length,\n                              time_major=time_major,\n                              name=name)\n    return helper\n\n\nclass SoftmaxEmbeddingHelper(TFHelper):\n    \"\"\"A helper that feeds softmax probabilities over vocabulary\n    to the next step.\n    Uses the softmax probability vector to pass through word embeddings to\n    get the next input (i.e., a mixed word embedding).\n\n    A subclass of\n    :tf_main:`Helper <contrib/seq2seq/Helper>`.\n    Used as a helper to :class:`~texar.modules.RNNDecoderBase` :meth:`_build`\n    in inference mode.\n\n    Args:\n        embedding: An embedding argument (:attr:`params`) for\n            :tf_main:`tf.nn.embedding_lookup <nn/embedding_lookup>`, or an\n            instance of subclass of :class:`texar.modules.EmbedderBase`.\n            Note that other callables are not acceptable here.\n        start_tokens: An int tensor shaped `[batch_size]`. The\n            start tokens.\n        end_token: An int scalar tensor. The token that marks end of\n            decoding.\n        tau: A float scalar tensor, the softmax temperature.\n        stop_gradient (bool): Whether to stop the gradient backpropagation\n            when feeding softmax vector to the next step.\n        use_finish (bool): Whether to stop decoding once `end_token` is\n            generated. If `False`, decoding will continue until\n            `max_decoding_length` of the decoder is reached.\n    \"\"\"\n\n    def __init__(self, embedding, start_tokens, end_token, tau,\n                 stop_gradient=False, use_finish=True):\n        if isinstance(embedding, EmbedderBase):\n            embedding = embedding.embedding\n\n        if callable(embedding):\n            raise ValueError(\"`embedding` must be an embedding tensor or an \"\n                             \"instance of subclass of `EmbedderBase`.\")\n        else:\n            self._embedding = embedding\n            self._embedding_fn = (\n                lambda ids: tf.nn.embedding_lookup(embedding, ids))\n\n        self._start_tokens = tf.convert_to_tensor(\n            start_tokens, dtype=tf.int32, name=\"start_tokens\")\n        self._end_token = tf.convert_to_tensor(\n            end_token, dtype=tf.int32, name=\"end_token\")\n        self._start_inputs = self._embedding_fn(self._start_tokens)\n        self._batch_size = tf.size(self._start_tokens)\n        self._tau = tau\n        self._stop_gradient = stop_gradient\n        self._use_finish = use_finish\n\n    @property\n    def batch_size(self):\n        return self._batch_size\n\n    @property\n    def sample_ids_dtype(self):\n        return tf.float32\n\n    @property\n    def sample_ids_shape(self):\n        return self._embedding.get_shape()[:1]\n\n    def initialize(self, name=None):\n        finished = tf.tile([False], [self._batch_size])\n        return (finished, self._start_inputs)\n\n    def sample(self, time, outputs, state, name=None):\n        \"\"\"Returns `sample_id` which is softmax distributions over vocabulary\n        with temperature `tau`. Shape = `[batch_size, vocab_size]`\n        \"\"\"\n        sample_ids = tf.nn.softmax(outputs / self._tau)\n        return sample_ids\n\n    def next_inputs(self, time, outputs, state, sample_ids, name=None):\n        if self._use_finish:\n            hard_ids = tf.argmax(sample_ids, axis=-1, output_type=tf.int32)\n            finished = tf.equal(hard_ids, self._end_token)\n        else:\n            finished = tf.tile([False], [self._batch_size])\n        if self._stop_gradient:\n            sample_ids = tf.stop_gradient(sample_ids)\n        next_inputs = tf.matmul(sample_ids, self._embedding)\n        return (finished, next_inputs, state)\n\n\nclass GumbelSoftmaxEmbeddingHelper(SoftmaxEmbeddingHelper):\n    \"\"\"A helper that feeds gumbel softmax sample to the next step.\n    Uses the gumbel softmax vector to pass through word embeddings to\n    get the next input (i.e., a mixed word embedding).\n\n    A subclass of\n    :tf_main:`Helper <contrib/seq2seq/Helper>`.\n    Used as a helper to :class:`~texar.modules.RNNDecoderBase` :meth:`_build`\n    in inference mode.\n\n    Same as :class:`~texar.modules.SoftmaxEmbeddingHelper` except that here\n    gumbel softmax (instead of softmax) is used.\n\n    Args:\n        embedding: An embedding argument (:attr:`params`) for\n            :tf_main:`tf.nn.embedding_lookup <nn/embedding_lookup>`, or an\n            instance of subclass of :class:`texar.modules.EmbedderBase`.\n            Note that other callables are not acceptable here.\n        start_tokens: An int tensor shaped `[batch_size]`. The\n            start tokens.\n        end_token: An int scalar tensor. The token that marks end of\n            decoding.\n        tau: A float scalar tensor, the softmax temperature.\n        straight_through (bool): Whether to use straight through gradient\n            between time steps. If `True`, a single token with highest\n            probability (i.e., greedy sample) is fed to the next step and\n            gradient is computed using straight through. If `False` (default),\n            the soft gumbel-softmax distribution is fed to the next step.\n        stop_gradient (bool): Whether to stop the gradient backpropagation\n            when feeding softmax vector to the next step.\n        use_finish (bool): Whether to stop decoding once `end_token` is\n            generated. If `False`, decoding will continue until\n            `max_decoding_length` of the decoder is reached.\n    \"\"\"\n    def __init__(self, embedding, start_tokens, end_token, tau,\n                 straight_through=False, stop_gradient=False, use_finish=True):\n        super(GumbelSoftmaxEmbeddingHelper, self).__init__(\n            embedding, start_tokens, end_token, tau, stop_gradient, use_finish)\n        self._straight_through = straight_through\n\n    def sample(self, time, outputs, state, name=None):\n        \"\"\"Returns `sample_id` of shape `[batch_size, vocab_size]`. If\n        `straight_through` is False, this is gumbel softmax distributions over\n        vocabulary with temperature `tau`. If `straight_through` is True,\n        this is one-hot vectors of the greedy samples.\n        \"\"\"\n        sample_ids = tf.nn.softmax(outputs / self._tau)\n        sample_ids = GumbelSoftmax(self._tau, logits=outputs).sample()\n        if self._straight_through:\n            size = tf.shape(sample_ids)[-1]\n            sample_ids_hard = tf.cast(\n                tf.one_hot(tf.argmax(sample_ids, -1), size), sample_ids.dtype)\n            sample_ids = tf.stop_gradient(sample_ids_hard - sample_ids) \\\n                         + sample_ids\n        return sample_ids\n"
  },
  {
    "path": "texar_repo/texar/modules/decoders/rnn_decoders.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nVarious RNN decoders.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=no-name-in-module, too-many-arguments, too-many-locals\n# pylint: disable=not-context-manager, protected-access, invalid-name\n\nimport collections\nimport copy\n\nimport tensorflow as tf\nfrom tensorflow.contrib.seq2seq import AttentionWrapper\nfrom tensorflow.python.util import nest\nfrom tensorflow.contrib.seq2seq import tile_batch\n\nfrom texar.modules.decoders.rnn_decoder_base import RNNDecoderBase\nfrom texar.utils import utils\n\n__all__ = [\n    \"BasicRNNDecoderOutput\",\n    \"AttentionRNNDecoderOutput\",\n    \"BasicRNNDecoder\",\n    \"AttentionRNNDecoder\"\n]\n\nclass BasicRNNDecoderOutput(\n        collections.namedtuple(\"BasicRNNDecoderOutput\",\n                               (\"logits\", \"sample_id\", \"cell_output\"))):\n    \"\"\"The outputs of basic RNN decoder that include both RNN outputs and\n    sampled ids at each step. This is also used to store results of all the\n    steps after decoding the whole sequence.\n\n    Attributes:\n        logits: The outputs of RNN (at each step/of all steps) by applying the\n            output layer on cell outputs. E.g., in\n            :class:`~texar.modules.BasicRNNDecoder` with default\n            hyperparameters, this is a Tensor of\n            shape `[batch_size, max_time, vocab_size]` after decoding the\n            whole sequence.\n        sample_id: The sampled results (at each step/of all steps). E.g., in\n            BasicRNNDecoder with decoding strategy of train_greedy,\n            this is a Tensor\n            of shape `[batch_size, max_time]` containing the sampled token\n            indexes of all steps.\n        cell_output: The output of RNN cell (at each step/of all steps).\n            This is the results prior to the output layer. E.g., in\n            BasicRNNDecoder with default\n            hyperparameters, this is a Tensor of\n            shape `[batch_size, max_time, cell_output_size]` after decoding\n            the whole sequence.\n    \"\"\"\n    pass\n\nclass AttentionRNNDecoderOutput(\n        collections.namedtuple(\n            \"AttentionRNNDecoderOutput\",\n            [\"logits\", \"sample_id\", \"cell_output\",\n             \"attention_scores\", \"attention_context\"])):\n    \"\"\"The outputs of attention RNN decoders that additionally include\n    attention results.\n\n    Attributes:\n        logits: The outputs of RNN (at each step/of all steps) by applying the\n            output layer on cell outputs. E.g., in\n            :class:`~texar.modules.AttentionRNNDecoder`, this is a Tensor of\n            shape `[batch_size, max_time, vocab_size]` after decoding.\n        sample_id: The sampled results (at each step/of all steps). E.g., in\n            :class:`~texar.modules.AttentionRNNDecoder` with decoding strategy\n            of train_greedy, this\n            is a Tensor of shape `[batch_size, max_time]` containing the\n            sampled token indexes of all steps.\n        cell_output: The output of RNN cell (at each step/of all steps).\n            This is the results prior to the output layer. E.g., in\n            AttentionRNNDecoder with default\n            hyperparameters, this is a Tensor of\n            shape `[batch_size, max_time, cell_output_size]` after decoding\n            the whole sequence.\n        attention_scores: A single or tuple of `Tensor`(s) containing the\n            alignments emitted (at the previous time step/of all time steps)\n            for each attention mechanism.\n        attention_context: The attention emitted (at the previous time step/of\n            all time steps).\n    \"\"\"\n    pass\n\n\nclass BasicRNNDecoder(RNNDecoderBase):\n    \"\"\"Basic RNN decoder.\n\n    Args:\n        cell (RNNCell, optional): An instance of\n            :tf_main:`RNNCell <ontrib/rnn/RNNCell>`. If `None`\n            (default), a cell is created as specified in\n            :attr:`hparams`.\n        cell_dropout_mode (optional): A Tensor taking value of\n            :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, which\n            toggles dropout in the RNN cell (e.g., activates dropout in\n            TRAIN mode). If `None`, :func:`~texar.global_mode` is used.\n            Ignored if :attr:`cell` is given.\n        vocab_size (int, optional): Vocabulary size. Required if\n            :attr:`output_layer` is `None`.\n        output_layer (optional): An instance of\n            :tf_main:`tf.layers.Layer <layers/Layer>`, or\n            :tf_main:`tf.identity <identity>`. Apply to the RNN cell\n            output to get logits. If `None`, a dense layer\n            is used with output dimension set to :attr:`vocab_size`.\n            Set `output_layer=tf.identity` if you do not want to have an\n            output layer after the RNN cell outputs.\n        hparams (dict, optional): Hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n\n    See :meth:`~texar.modules.RNNDecoderBase._build` for the inputs and outputs\n    of the decoder. The decoder returns\n    `(outputs, final_state, sequence_lengths)`, where `outputs` is an instance\n    of :class:`~texar.modules.BasicRNNDecoderOutput`.\n\n    Example:\n\n        .. code-block:: python\n\n            embedder = WordEmbedder(vocab_size=data.vocab.size)\n            decoder = BasicRNNDecoder(vocab_size=data.vocab.size)\n\n            # Training loss\n            outputs, _, _ = decoder(\n                decoding_strategy='train_greedy',\n                inputs=embedder(data_batch['text_ids']),\n                sequence_length=data_batch['length']-1)\n\n            loss = tx.losses.sequence_sparse_softmax_cross_entropy(\n                labels=data_batch['text_ids'][:, 1:],\n                logits=outputs.logits,\n                sequence_length=data_batch['length']-1)\n\n            # Inference sample\n            outputs, _, _ = decoder(\n                decoding_strategy='infer_sample',\n                start_tokens=[data.vocab.bos_token_id]*100,\n                end_token=data.vocab.eos.token_id,\n                embedding=embedder,\n                max_decoding_length=60,\n                mode=tf.estimator.ModeKeys.PREDICT)\n\n            sample_id = sess.run(outputs.sample_id)\n            sample_text = tx.utils.map_ids_to_strs(sample_id, data.vocab)\n            print(sample_text)\n            # [\n            #   the first sequence sample .\n            #   the second sequence sample .\n            #   ...\n            # ]\n    \"\"\"\n\n    def __init__(self,\n                 cell=None,\n                 cell_dropout_mode=None,\n                 vocab_size=None,\n                 output_layer=None,\n                 hparams=None):\n        RNNDecoderBase.__init__(\n            self, cell, vocab_size, output_layer, cell_dropout_mode, hparams)\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        .. code-block:: python\n\n            {\n                \"rnn_cell\": default_rnn_cell_hparams(),\n                \"max_decoding_length_train\": None,\n                \"max_decoding_length_infer\": None,\n                \"helper_train\": {\n                    \"type\": \"TrainingHelper\",\n                    \"kwargs\": {}\n                }\n                \"helper_infer\": {\n                    \"type\": \"SampleEmbeddingHelper\",\n                    \"kwargs\": {}\n                }\n                \"name\": \"basic_rnn_decoder\"\n            }\n\n        Here:\n\n        \"rnn_cell\" : dict\n            A dictionary of RNN cell hyperparameters. Ignored if\n            :attr:`cell` is given to the decoder constructor.\n            The default value is defined in\n            :meth:`~texar.core.layers.default_rnn_cell_hparams`.\n\n        \"max_decoding_length_train\": int or None\n            Maximum allowed number of decoding steps in training mode.\n            If `None` (default), decoding is\n            performed until fully done, e.g., encountering the <EOS> token.\n            Ignored if `max_decoding_length` is given when calling\n            the decoder.\n\n        \"max_decoding_length_infer\" : int or None\n            Same as \"max_decoding_length_train\" but for inference mode.\n\n        \"helper_train\" : dict\n            The hyperparameters of the helper used in training.\n            \"type\" can be a helper class, its name or module path, or a\n            helper instance. If a class name is given, the class must be\n            from module :tf_main:`tf.contrib.seq2seq <contrib/seq2seq>`,\n            :mod:`texar.modules`, or :mod:`texar.custom`. This is used\n            only when both `decoding_strategy` and `helper` augments are\n            `None` when calling the decoder. See\n            :meth:`~texar.modules.RNNDecoderBase._build` for more details.\n\n        \"helper_infer\": dict\n            Same as \"helper_train\" but during inference mode.\n\n        \"name\" : str\n            Name of the decoder.\n\n            The default value is \"basic_rnn_decoder\".\n        \"\"\"\n        hparams = RNNDecoderBase.default_hparams()\n        hparams[\"name\"] = \"basic_rnn_decoder\"\n        return hparams\n\n    def initialize(self, name=None):\n        return self._helper.initialize() + (self._initial_state,)\n\n    def step(self, time, inputs, state, name=None):\n        cell_outputs, cell_state = self._cell(inputs, state)\n        logits = self._output_layer(cell_outputs)\n        sample_ids = self._helper.sample(\n            time=time, outputs=logits, state=cell_state)\n        (finished, next_inputs, next_state) = self._helper.next_inputs(\n            time=time,\n            outputs=logits,\n            state=cell_state,\n            sample_ids=sample_ids)\n        outputs = BasicRNNDecoderOutput(logits, sample_ids, cell_outputs)\n        return (outputs, next_state, next_inputs, finished)\n\n    def finalize(self, outputs, final_state, sequence_lengths):\n        return outputs, final_state\n\n    @property\n    def output_size(self):\n        \"\"\"Output size of one step.\n        \"\"\"\n        return BasicRNNDecoderOutput(\n            logits=self._rnn_output_size(),\n            sample_id=self._helper.sample_ids_shape,\n            cell_output=self._cell.output_size)\n\n    @property\n    def output_dtype(self):\n        \"\"\"Types of output of one step.\n        \"\"\"\n        # Assume the dtype of the cell is the output_size structure\n        # containing the input_state's first component's dtype.\n        # Return that structure and the sample_ids_dtype from the helper.\n        dtype = nest.flatten(self._initial_state)[0].dtype\n        return BasicRNNDecoderOutput(\n            logits=nest.map_structure(lambda _: dtype, self._rnn_output_size()),\n            sample_id=self._helper.sample_ids_dtype,\n            cell_output=nest.map_structure(\n                lambda _: dtype, self._cell.output_size))\n\n\nclass AttentionRNNDecoder(RNNDecoderBase):\n    \"\"\"RNN decoder with attention mechanism.\n\n    Args:\n        memory: The memory to query, e.g., the output of an RNN encoder. This\n            tensor should be shaped `[batch_size, max_time, dim]`.\n        memory_sequence_length (optional): A tensor of shape `[batch_size]`\n            containing the sequence lengths for the batch\n            entries in memory. If provided, the memory tensor rows are masked\n            with zeros for values past the respective sequence lengths.\n        cell (RNNCell, optional): An instance of `RNNCell`. If `None`, a cell\n            is created as specified in :attr:`hparams`.\n        cell_dropout_mode (optional): A Tensor taking value of\n            :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, which\n            toggles dropout in the RNN cell (e.g., activates dropout in\n            TRAIN mode). If `None`, :func:`~texar.global_mode` is used.\n            Ignored if :attr:`cell` is given.\n        vocab_size (int, optional): Vocabulary size. Required if\n            :attr:`output_layer` is `None`.\n        output_layer (optional): An instance of\n            :tf_main:`tf.layers.Layer <layers/Layer>`, or\n            :tf_main:`tf.identity <identity>`. Apply to the RNN cell\n            output to get logits. If `None`, a dense layer\n            is used with output dimension set to :attr:`vocab_size`.\n            Set `output_layer=tf.identity` if you do not want to have an\n            output layer after the RNN cell outputs.\n        cell_input_fn (callable, optional): A callable that produces RNN cell\n            inputs. If `None` (default), the default is used:\n            `lambda inputs, attention: tf.concat([inputs, attention], -1)`,\n            which cancats regular RNN cell inputs with attentions.\n        hparams (dict, optional): Hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n\n    See :meth:`~texar.modules.RNNDecoderBase._build` for the inputs and outputs\n    of the decoder. The decoder returns\n    `(outputs, final_state, sequence_lengths)`, where `outputs` is an instance\n    of :class:`~texar.modules.AttentionRNNDecoderOutput`.\n\n    Example:\n\n        .. code-block:: python\n\n            # Encodes the source\n            enc_embedder = WordEmbedder(data.source_vocab.size, ...)\n            encoder = UnidirectionalRNNEncoder(...)\n\n            enc_outputs, _ = encoder(\n                inputs=enc_embedder(data_batch['source_text_ids']),\n                sequence_length=data_batch['source_length'])\n\n            # Decodes while attending to the source\n            dec_embedder = WordEmbedder(vocab_size=data.target_vocab.size, ...)\n            decoder = AttentionRNNDecoder(\n                memory=enc_outputs,\n                memory_sequence_length=data_batch['source_length'],\n                vocab_size=data.target_vocab.size)\n\n            outputs, _, _ = decoder(\n                decoding_strategy='train_greedy',\n                inputs=dec_embedder(data_batch['target_text_ids']),\n                sequence_length=data_batch['target_length']-1)\n    \"\"\"\n    def __init__(self,\n                 memory,\n                 memory_sequence_length=None,\n                 cell=None,\n                 cell_dropout_mode=None,\n                 vocab_size=None,\n                 output_layer=None,\n                 #attention_layer=None, # TODO(zhiting): only valid for tf>=1.0\n                 cell_input_fn=None,\n                 hparams=None):\n        RNNDecoderBase.__init__(\n            self, cell, vocab_size, output_layer, cell_dropout_mode, hparams)\n\n        attn_hparams = self._hparams['attention']\n        attn_kwargs = attn_hparams['kwargs'].todict()\n\n        # Parse the 'probability_fn' argument\n        if 'probability_fn' in attn_kwargs:\n            prob_fn = attn_kwargs['probability_fn']\n            if prob_fn is not None and not callable(prob_fn):\n                prob_fn = utils.get_function(\n                    prob_fn,\n                    ['tensorflow.nn', 'tensorflow.contrib.sparsemax',\n                     'tensorflow.contrib.seq2seq'])\n            attn_kwargs['probability_fn'] = prob_fn\n\n        attn_kwargs.update({\n            \"memory_sequence_length\": memory_sequence_length,\n            \"memory\": memory})\n        self._attn_kwargs = attn_kwargs\n        attn_modules = ['tensorflow.contrib.seq2seq', 'texar.custom']\n        # Use variable_scope to ensure all trainable variables created in\n        # the attention mechanism are collected\n        with tf.variable_scope(self.variable_scope):\n            attention_mechanism = utils.check_or_get_instance(\n                attn_hparams[\"type\"], attn_kwargs, attn_modules,\n                classtype=tf.contrib.seq2seq.AttentionMechanism)\n\n        self._attn_cell_kwargs = {\n            \"attention_layer_size\": attn_hparams[\"attention_layer_size\"],\n            \"alignment_history\": attn_hparams[\"alignment_history\"],\n            \"output_attention\": attn_hparams[\"output_attention\"],\n        }\n        self._cell_input_fn = cell_input_fn\n        # Use variable_scope to ensure all trainable variables created in\n        # AttentionWrapper are collected\n        with tf.variable_scope(self.variable_scope):\n            #if attention_layer is not None:\n            #    self._attn_cell_kwargs[\"attention_layer_size\"] = None\n            attn_cell = AttentionWrapper(\n                self._cell,\n                attention_mechanism,\n                cell_input_fn=self._cell_input_fn,\n                #attention_layer=attention_layer,\n                **self._attn_cell_kwargs)\n            self._cell = attn_cell\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values:\n\n        Common hyperparameters are the same as in\n        :class:`~texar.modules.BasicRNNDecoder`.\n        :meth:`~texar.modules.BasicRNNDecoder.default_hparams`.\n        Additional hyperparameters are for attention mechanism\n        configuration.\n\n        .. code-block:: python\n\n            {\n                \"attention\": {\n                    \"type\": \"LuongAttention\",\n                    \"kwargs\": {\n                        \"num_units\": 256,\n                    },\n                    \"attention_layer_size\": None,\n                    \"alignment_history\": False,\n                    \"output_attention\": True,\n                },\n                # The following hyperparameters are the same as with\n                # `BasicRNNDecoder`\n                \"rnn_cell\": default_rnn_cell_hparams(),\n                \"max_decoding_length_train\": None,\n                \"max_decoding_length_infer\": None,\n                \"helper_train\": {\n                    \"type\": \"TrainingHelper\",\n                    \"kwargs\": {}\n                }\n                \"helper_infer\": {\n                    \"type\": \"SampleEmbeddingHelper\",\n                    \"kwargs\": {}\n                }\n                \"name\": \"attention_rnn_decoder\"\n            }\n\n        Here:\n\n        \"attention\" : dict\n            Attention hyperparameters, including:\n\n            \"type\" : str or class or instance\n                The attention type. Can be an attention class, its name or\n                module path, or a class instance. The class must be a subclass\n                of :tf_main:`TF AttentionMechanism\n                <contrib/seq2seq/AttentionMechanism>`. If class name is\n                given, the class must be from modules\n                :tf_main:`tf.contrib.seq2seq <contrib/seq2seq>` or\n                :mod:`texar.custom`.\n\n                Example:\n\n                    .. code-block:: python\n\n                        # class name\n                        \"type\": \"LuongAttention\"\n                        \"type\": \"BahdanauAttention\"\n                        # module path\n                        \"type\": \"tf.contrib.seq2seq.BahdanauMonotonicAttention\"\n                        \"type\": \"my_module.MyAttentionMechanismClass\"\n                        # class\n                        \"type\": tf.contrib.seq2seq.LuongMonotonicAttention\n                        # instance\n                        \"type\": LuongAttention(...)\n\n            \"kwargs\" : dict\n                keyword arguments for the attention class constructor.\n                Arguments :attr:`memory` and\n                :attr:`memory_sequence_length` should **not** be\n                specified here because they are given to the decoder\n                constructor. Ignored if \"type\" is an attention class\n                instance. For example\n\n                Example:\n\n                    .. code-block:: python\n\n                        \"type\": \"LuongAttention\",\n                        \"kwargs\": {\n                            \"num_units\": 256,\n                            \"probability_fn\": tf.nn.softmax\n                        }\n\n                    Here \"probability_fn\" can also be set to the string name\n                    or module path to a probability function.\n\n                \"attention_layer_size\" : int or None\n                    The depth of the attention (output) layer. The context and\n                    cell output are fed into the attention layer to generate\n                    attention at each time step.\n                    If `None` (default), use the context as attention at each\n                    time step.\n\n                \"alignment_history\": bool\n                    whether to store alignment history from all time steps\n                    in the final output state. (Stored as a time major\n                    `TensorArray` on which you must call `stack()`.)\n\n                \"output_attention\": bool\n                    If `True` (default), the output at each time step is\n                    the attention value. This is the behavior of Luong-style\n                    attention mechanisms. If `False`, the output at each\n                    time step is the output of `cell`.  This is the\n                    beahvior of Bhadanau-style attention mechanisms.\n                    In both cases, the `attention` tensor is propagated to\n                    the next time step via the state and is used there.\n                    This flag only controls whether the attention mechanism\n                    is propagated up to the next cell in an RNN stack or to\n                    the top RNN output.\n        \"\"\"\n        hparams = RNNDecoderBase.default_hparams()\n        hparams[\"name\"] = \"attention_rnn_decoder\"\n        hparams[\"attention\"] = {\n            \"type\": \"LuongAttention\",\n            \"kwargs\": {\n                \"num_units\": 256,\n            },\n            \"attention_layer_size\": None,\n            \"alignment_history\": False,\n            \"output_attention\": True,\n        }\n        return hparams\n\n    # pylint: disable=arguments-differ\n    def _get_beam_search_cell(self, beam_width):\n        \"\"\"Returns the RNN cell for beam search decoding.\n        \"\"\"\n        with tf.variable_scope(self.variable_scope, reuse=True):\n            attn_kwargs = copy.copy(self._attn_kwargs)\n\n            memory = attn_kwargs['memory']\n            attn_kwargs['memory'] = tile_batch(memory, multiplier=beam_width)\n\n            memory_seq_length = attn_kwargs['memory_sequence_length']\n            if memory_seq_length is not None:\n                attn_kwargs['memory_sequence_length'] = tile_batch(\n                    memory_seq_length, beam_width)\n\n            attn_modules = ['tensorflow.contrib.seq2seq', 'texar.custom']\n            bs_attention_mechanism = utils.check_or_get_instance(\n                self._hparams.attention.type, attn_kwargs, attn_modules,\n                classtype=tf.contrib.seq2seq.AttentionMechanism)\n\n            bs_attn_cell = AttentionWrapper(\n                self._cell._cell,\n                bs_attention_mechanism,\n                cell_input_fn=self._cell_input_fn,\n                **self._attn_cell_kwargs)\n\n            self._beam_search_cell = bs_attn_cell\n\n            return bs_attn_cell\n\n    def initialize(self, name=None):\n        helper_init = self._helper.initialize()\n\n        flat_initial_state = nest.flatten(self._initial_state)\n        dtype = flat_initial_state[0].dtype\n        initial_state = self._cell.zero_state(\n            batch_size=tf.shape(flat_initial_state[0])[0], dtype=dtype)\n        initial_state = initial_state.clone(cell_state=self._initial_state)\n\n        return [helper_init[0], helper_init[1], initial_state]\n\n    def step(self, time, inputs, state, name=None):\n        wrapper_outputs, wrapper_state = self._cell(inputs, state)\n        # Essentisally the same as in BasicRNNDecoder.step()\n        logits = self._output_layer(wrapper_outputs)\n        sample_ids = self._helper.sample(\n            time=time, outputs=logits, state=wrapper_state)\n        (finished, next_inputs, next_state) = self._helper.next_inputs(\n            time=time,\n            outputs=logits,\n            state=wrapper_state,\n            sample_ids=sample_ids)\n\n        attention_scores = wrapper_state.alignments\n        attention_context = wrapper_state.attention\n        outputs = AttentionRNNDecoderOutput(\n            logits, sample_ids, wrapper_outputs,\n            attention_scores, attention_context)\n\n        return (outputs, next_state, next_inputs, finished)\n\n    def finalize(self, outputs, final_state, sequence_lengths):\n        return outputs, final_state\n\n    def _alignments_size(self):\n        # Reimplementation of the alignments_size of each of\n        # AttentionWrapper.attention_mechanisms. The original implementation\n        # of `_BaseAttentionMechanism._alignments_size`:\n        #\n        #    self._alignments_size = (self._keys.shape[1].value or\n        #                       array_ops.shape(self._keys)[1])\n        #\n        # can be `None` when the seq length of encoder outputs are priori\n        # unknown.\n        alignments_size = []\n        for am in self._cell._attention_mechanisms:\n            az = (am._keys.shape[1].value or tf.shape(am._keys)[1:-1])\n            alignments_size.append(az)\n        return self._cell._item_or_tuple(alignments_size)\n\n    @property\n    def output_size(self):\n        return AttentionRNNDecoderOutput(\n            logits=self._rnn_output_size(),\n            sample_id=self._helper.sample_ids_shape,\n            cell_output=self._cell.output_size,\n            attention_scores=self._alignments_size(),\n            attention_context=self._cell.state_size.attention)\n\n    @property\n    def output_dtype(self):\n        \"\"\"Types of output of one step.\n        \"\"\"\n        # Assume the dtype of the cell is the output_size structure\n        # containing the input_state's first component's dtype.\n        # Return that structure and the sample_ids_dtype from the helper.\n        dtype = nest.flatten(self._initial_state)[0].dtype\n        return AttentionRNNDecoderOutput(\n            logits=nest.map_structure(lambda _: dtype, self._rnn_output_size()),\n            sample_id=self._helper.sample_ids_dtype,\n            cell_output=nest.map_structure(\n                lambda _: dtype, self._cell.output_size),\n            attention_scores=nest.map_structure(\n                lambda _: dtype, self._alignments_size()),\n            attention_context=nest.map_structure(\n                lambda _: dtype, self._cell.state_size.attention))\n\n    def zero_state(self, batch_size, dtype):\n        \"\"\"Returns zero state of the basic cell.\n        Equivalent to :attr:`decoder.cell._cell.zero_state`.\n        \"\"\"\n        return self._cell._cell.zero_state(batch_size=batch_size, dtype=dtype)\n\n    def wrapper_zero_state(self, batch_size, dtype):\n        \"\"\"Returns zero state of the attention-wrapped cell.\n        Equivalent to :attr:`decoder.cell.zero_state`.\n        \"\"\"\n        return self._cell.zero_state(batch_size=batch_size, dtype=dtype)\n\n    @property\n    def state_size(self):\n        \"\"\"The state size of the basic cell.\n        Equivalent to :attr:`decoder.cell._cell.state_size`.\n        \"\"\"\n        return self._cell._cell.state_size\n\n\n    @property\n    def wrapper_state_size(self):\n        \"\"\"The state size of the attention-wrapped cell.\n        Equivalent to :attr:`decoder.cell.state_size`.\n        \"\"\"\n        return self._cell.state_size\n\n"
  },
  {
    "path": "texar_repo/texar/modules/decoders/rnn_decoders_test.py",
    "content": "\"\"\"\nUnit tests for RNN decoders.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport numpy as np\n\nimport tensorflow as tf\n\nfrom texar.modules.decoders.rnn_decoders import BasicRNNDecoderOutput\nfrom texar.modules.decoders.rnn_decoders import BasicRNNDecoder\nfrom texar.modules.decoders.rnn_decoders import AttentionRNNDecoderOutput\nfrom texar.modules.decoders.rnn_decoders import AttentionRNNDecoder\nfrom texar.modules.decoders.rnn_decoder_helpers import get_helper\nfrom texar import context\n\n# pylint: disable=no-member, too-many-locals, too-many-instance-attributes\n# pylint: disable=too-many-arguments, protected-access\n\nclass BasicRNNDecoderTest(tf.test.TestCase):\n    \"\"\"Tests :class:`~texar.modules.decoders.rnn_decoders.BasicRNNDecoder`.\n    \"\"\"\n\n    def setUp(self):\n        tf.test.TestCase.setUp(self)\n        self._vocab_size = 4\n        self._max_time = 8\n        self._batch_size = 16\n        self._emb_dim = 20\n        self._inputs = tf.random_uniform(\n            [self._batch_size, self._max_time, self._emb_dim],\n            maxval=1., dtype=tf.float32)\n        self._embedding = tf.random_uniform(\n            [self._vocab_size, self._emb_dim], maxval=1., dtype=tf.float32)\n\n    def _test_outputs(self, decoder, outputs, final_state, sequence_lengths,\n                      test_mode=False):\n        # 4 trainable variables: cell-kernel, cell-bias,\n        # fc-layer-weights, fc-layer-bias\n        self.assertEqual(len(decoder.trainable_variables), 4)\n\n        cell_dim = decoder.hparams.rnn_cell.kwargs.num_units\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n\n            outputs_, final_state_, sequence_lengths_ = sess.run(\n                [outputs, final_state, sequence_lengths],\n                feed_dict={context.global_mode(): tf.estimator.ModeKeys.TRAIN})\n            self.assertIsInstance(outputs_, BasicRNNDecoderOutput)\n            if not test_mode:\n                self.assertEqual(\n                    outputs_.logits.shape,\n                    (self._batch_size, self._max_time, self._vocab_size))\n                self.assertEqual(\n                    outputs_.sample_id.shape,\n                    (self._batch_size, self._max_time))\n                np.testing.assert_array_equal(\n                    sequence_lengths_, [self._max_time]*self._batch_size)\n            self.assertEqual(final_state_[0].shape,\n                             (self._batch_size, cell_dim))\n\n    def test_decode_train(self):\n        \"\"\"Tests decoding in training mode.\n        \"\"\"\n        output_layer = tf.layers.Dense(self._vocab_size)\n        decoder = BasicRNNDecoder(vocab_size=self._vocab_size,\n                                  output_layer=output_layer)\n\n        helper_train = get_helper(\n            decoder.hparams.helper_train.type,\n            inputs=self._inputs,\n            sequence_length=[self._max_time]*self._batch_size,\n            **decoder.hparams.helper_train.kwargs.todict())\n        outputs, final_state, sequence_lengths = decoder(helper=helper_train)\n        self._test_outputs(decoder, outputs, final_state, sequence_lengths)\n\n        outputs, final_state, sequence_lengths = decoder(\n            inputs=self._inputs,\n            sequence_length=[self._max_time]*self._batch_size)\n        self._test_outputs(decoder, outputs, final_state, sequence_lengths)\n\n        outputs, final_state, sequence_lengths = decoder(\n            decoding_strategy=None,\n            inputs=self._inputs,\n            sequence_length=[self._max_time]*self._batch_size)\n        self._test_outputs(decoder, outputs, final_state, sequence_lengths)\n\n        outputs, final_state, sequence_lengths = decoder(\n            decoding_strategy=None,\n            embedding=self._embedding,\n            start_tokens=[1]*self._batch_size,\n            end_token=2,\n            mode=tf.estimator.ModeKeys.EVAL)\n        self._test_outputs(decoder, outputs, final_state, sequence_lengths,\n                           test_mode=True)\n\n    def test_decode_train_with_tf(self):\n        \"\"\"Compares decoding results with TF built-in decoder.\n        \"\"\"\n        _inputs_placeholder = tf.placeholder(\n            tf.int32, [self._batch_size, self._max_time], name=\"inputs\")\n        _embedding_placeholder = tf.placeholder(\n            tf.float32, [self._vocab_size, self._emb_dim], name=\"emb\")\n        inputs = tf.nn.embedding_lookup(_embedding_placeholder,\n                                        _inputs_placeholder)\n\n        output_layer = tf.layers.Dense(self._vocab_size)\n        decoder = BasicRNNDecoder(vocab_size=self._vocab_size,\n                                  output_layer=output_layer)\n\n        helper_train = get_helper(\n            decoder.hparams.helper_train.type,\n            inputs=inputs,\n            sequence_length=[self._max_time]*self._batch_size,\n            **decoder.hparams.helper_train.kwargs.todict())\n\n        outputs, final_state, sequence_lengths = decoder(helper=helper_train)\n\n        tf_helper = tf.contrib.seq2seq.TrainingHelper(\n            inputs, [self._max_time]*self._batch_size)\n\n        tf_decoder = tf.contrib.seq2seq.BasicDecoder(\n            decoder.cell,\n            tf_helper,\n            decoder.cell.zero_state(self._batch_size, tf.float32),\n            output_layer=output_layer)\n\n        tf_outputs, tf_final_state, tf_sequence_lengths = \\\n            tf.contrib.seq2seq.dynamic_decode(tf_decoder)\n\n        cell_dim = decoder.hparams.rnn_cell.kwargs.num_units\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            inputs_ = np.random.randint(\n                self._vocab_size, size=(self._batch_size, self._max_time),\n                dtype=np.int32)\n            embedding_ = np.random.randn(self._vocab_size, self._emb_dim)\n\n            outputs_, final_state_, sequence_lengths_ = sess.run(\n                [outputs, final_state, sequence_lengths],\n                feed_dict={context.global_mode(): tf.estimator.ModeKeys.TRAIN,\n                           _inputs_placeholder: inputs_,\n                           _embedding_placeholder: embedding_})\n            self.assertEqual(final_state_[0].shape,\n                             (self._batch_size, cell_dim))\n\n            tf_outputs_, tf_final_state_, tf_sequence_lengths_ = sess.run(\n                [tf_outputs, tf_final_state, tf_sequence_lengths],\n                feed_dict={context.global_mode(): tf.estimator.ModeKeys.TRAIN,\n                           _inputs_placeholder: inputs_,\n                           _embedding_placeholder: embedding_})\n\n            np.testing.assert_array_equal(outputs_.logits,\n                                          tf_outputs_.rnn_output)\n            np.testing.assert_array_equal(outputs_.sample_id,\n                                          tf_outputs_.sample_id)\n            np.testing.assert_array_equal(final_state_.c, tf_final_state_.c)\n            np.testing.assert_array_equal(final_state_.h, tf_final_state_.h)\n            np.testing.assert_array_equal(sequence_lengths_,\n                                          tf_sequence_lengths_)\n\n    def test_decode_infer(self):\n        \"\"\"Tests decoding in inferencee mode.\n        \"\"\"\n        output_layer = tf.layers.Dense(self._vocab_size)\n        decoder = BasicRNNDecoder(vocab_size=self._vocab_size,\n                                  output_layer=output_layer)\n\n        helper_infer = get_helper(\n            decoder.hparams.helper_infer.type,\n            embedding=self._embedding,\n            start_tokens=[self._vocab_size-2]*self._batch_size,\n            end_token=self._vocab_size-1,\n            **decoder.hparams.helper_train.kwargs.todict())\n\n        outputs, final_state, sequence_lengths = decoder(helper=helper_infer)\n\n        # 4 trainable variables: embedding, cell-kernel, cell-bias,\n        # fc-layer-weights, fc-layer-bias\n        self.assertEqual(len(decoder.trainable_variables), 4)\n\n        cell_dim = decoder.hparams.rnn_cell.kwargs.num_units\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            outputs_, final_state_, sequence_lengths_ = sess.run(\n                [outputs, final_state, sequence_lengths],\n                feed_dict={context.global_mode():\n                           tf.estimator.ModeKeys.PREDICT})\n            self.assertIsInstance(outputs_, BasicRNNDecoderOutput)\n            max_length = max(sequence_lengths_)\n            self.assertEqual(\n                outputs_.logits.shape,\n                (self._batch_size, max_length, self._vocab_size))\n            self.assertEqual(\n                outputs_.sample_id.shape, (self._batch_size, max_length))\n            self.assertEqual(final_state_[0].shape,\n                             (self._batch_size, cell_dim))\n\n\nclass AttentionRNNDecoderTest(tf.test.TestCase):\n    \"\"\"Tests :class:`~texar.modules.decoders.rnn_decoders.AttentionRNNDecoder`.\n    \"\"\"\n\n    def setUp(self):\n        tf.test.TestCase.setUp(self)\n        self._vocab_size = 10\n        self._max_time = 16\n        self._batch_size = 8\n        self._emb_dim = 20\n        self._attention_dim = 256\n        self._inputs = tf.random_uniform(\n            [self._batch_size, self._max_time, self._emb_dim],\n            maxval=1., dtype=tf.float32)\n        self._embedding = tf.random_uniform(\n            [self._vocab_size, self._emb_dim], maxval=1., dtype=tf.float32)\n        self._encoder_output = tf.random_uniform(\n            [self._batch_size, self._max_time, 64])\n\n    def test_decode_train(self):\n        \"\"\"Tests decoding in training mode.\n        \"\"\"\n        seq_length = np.random.randint(\n            self._max_time, size=[self._batch_size]) + 1\n        encoder_values_length = tf.constant(seq_length)\n        hparams = {\n            \"attention\": {\n                \"kwargs\": {\n                    \"num_units\": self._attention_dim,\n                    # Note: to use sparsemax in TF-CPU, it looks\n                    # `memory_sequence_length` must equal max_time.\n                    #\"probability_fn\": \"sparsemax\"\n                }\n            }\n        }\n        decoder = AttentionRNNDecoder(\n            memory=self._encoder_output,\n            memory_sequence_length=encoder_values_length,\n            vocab_size=self._vocab_size,\n            hparams=hparams)\n\n        helper_train = get_helper(\n            decoder.hparams.helper_train.type,\n            inputs=self._inputs,\n            sequence_length=[self._max_time]*self._batch_size,\n            **decoder.hparams.helper_train.kwargs.todict())\n\n        outputs, final_state, sequence_lengths = decoder(helper=helper_train)\n        # 4+1 trainable variables: cell-kernel, cell-bias,\n        # fc-weight, fc-bias, and\n        # memory_layer: For LuongAttention, we only transform the memory layer;\n        # thus num_units *must* match the expected query depth.\n        self.assertEqual(len(decoder.trainable_variables), 5)\n\n        cell_dim = decoder.hparams.rnn_cell.kwargs.num_units\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            outputs_, final_state_, sequence_lengths_ = sess.run(\n                [outputs, final_state, sequence_lengths],\n                feed_dict={context.global_mode(): tf.estimator.ModeKeys.TRAIN})\n            self.assertIsInstance(outputs_, AttentionRNNDecoderOutput)\n            self.assertEqual(\n                outputs_.logits.shape,\n                (self._batch_size, self._max_time, self._vocab_size))\n            self.assertEqual(\n                outputs_.sample_id.shape, (self._batch_size, self._max_time))\n            self.assertEqual(final_state_.cell_state[0].shape,\n                             (self._batch_size, cell_dim))\n            np.testing.assert_array_equal(\n                sequence_lengths_, [self._max_time]*self._batch_size)\n\n\n    def test_decode_infer(self):\n        \"\"\"Tests decoding in inference mode.\n        \"\"\"\n        seq_length = np.random.randint(\n            self._max_time, size=[self._batch_size]) + 1\n        encoder_values_length = tf.constant(seq_length)\n        hparams = {\n            \"attention\": {\n                \"kwargs\": {\n                    \"num_units\": 256,\n                }\n            }\n        }\n        decoder = AttentionRNNDecoder(\n            vocab_size=self._vocab_size,\n            memory=self._encoder_output,\n            memory_sequence_length=encoder_values_length,\n            hparams=hparams)\n\n        helper_infer = get_helper(\n            decoder.hparams.helper_infer.type,\n            embedding=self._embedding,\n            start_tokens=[1]*self._batch_size,\n            end_token=2,\n            **decoder.hparams.helper_train.kwargs.todict())\n\n        outputs, final_state, sequence_lengths = decoder(helper=helper_infer)\n\n        # 4+1 trainable variables: cell-kernel, cell-bias,\n        # fc-weight, fc-bias, and\n        # memory_layer: For LuongAttention, we only transform the memory layer;\n        # thus num_units *must* match the expected query depth.\n        self.assertEqual(len(decoder.trainable_variables), 5)\n        cell_dim = decoder.hparams.rnn_cell.kwargs.num_units\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            outputs_, final_state_, sequence_lengths_ = sess.run(\n                [outputs, final_state, sequence_lengths],\n                feed_dict={context.global_mode():\n                           tf.estimator.ModeKeys.PREDICT})\n            self.assertIsInstance(outputs_, AttentionRNNDecoderOutput)\n            max_length = max(sequence_lengths_)\n            self.assertEqual(\n                outputs_.logits.shape,\n                (self._batch_size, max_length, self._vocab_size))\n            self.assertEqual(\n                outputs_.sample_id.shape, (self._batch_size, max_length))\n            self.assertEqual(final_state_.cell_state[0].shape,\n                             (self._batch_size, cell_dim))\n\n    def test_beam_search_cell(self):\n        \"\"\"Tests :meth:`texar.modules.AttentionRNNDecoder._get_beam_search_cell`\n        \"\"\"\n        seq_length = np.random.randint(\n            self._max_time, size=[self._batch_size]) + 1\n        encoder_values_length = tf.constant(seq_length)\n        hparams = {\n            \"attention\": {\n                \"kwargs\": {\n                    \"num_units\": self._attention_dim,\n                    \"probability_fn\": \"sparsemax\"\n                }\n            }\n        }\n        decoder = AttentionRNNDecoder(\n            memory=self._encoder_output,\n            memory_sequence_length=encoder_values_length,\n            vocab_size=self._vocab_size,\n            hparams=hparams)\n\n        helper_train = get_helper(\n            decoder.hparams.helper_train.type,\n            inputs=self._inputs,\n            sequence_length=[self._max_time]*self._batch_size,\n            **decoder.hparams.helper_train.kwargs.todict())\n\n        _, _, _ = decoder(helper=helper_train)\n\n        ## 4+1 trainable variables: cell-kernel, cell-bias,\n        ## fc-weight, fc-bias, and\n        ## memory_layer: For LuongAttention, we only transform the memory layer;\n        ## thus num_units *must* match the expected query depth.\n        self.assertEqual(len(decoder.trainable_variables), 5)\n\n        beam_width = 3\n        beam_cell = decoder._get_beam_search_cell(beam_width)\n        cell_input = tf.random_uniform([self._batch_size * beam_width,\n                                        self._emb_dim])\n        cell_state = beam_cell.zero_state(self._batch_size * beam_width,\n                                          tf.float32)\n        _ = beam_cell(cell_input, cell_state)\n        # Test if beam_cell is sharing variables with decoder cell.\n        self.assertEqual(len(beam_cell.trainable_variables), 0)\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/modules/decoders/transformer_decoders.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nTransformer decoder.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=no-name-in-module, too-many-arguments, too-many-locals\n# pylint: disable=invalid-name\n\nimport collections\n\nimport tensorflow as tf\nfrom tensorflow.python.util import nest\n\nfrom texar.core import layers\nfrom texar.module_base import ModuleBase\nfrom texar.modules.networks.networks import FeedForwardNetwork\nfrom texar.modules.embedders.position_embedders import SinusoidsPositionEmbedder\nfrom texar.modules.encoders.transformer_encoders import \\\n    default_transformer_poswise_net_hparams\nfrom texar.modules.encoders.multihead_attention import \\\n    MultiheadAttentionEncoder\nfrom texar.utils import beam_search\nfrom texar.utils.shapes import shape_list, mask_sequences\nfrom texar.utils import transformer_attentions as attn\nfrom texar.utils.mode import is_train_mode\n\n__all__ = [\n    \"TransformerDecoderOutput\",\n    \"TransformerDecoder\"\n]\n\n\nclass TransformerDecoderOutput(\n        collections.namedtuple(\"TransformerDecoderOutput\",\n                               (\"logits\", \"sample_id\"))):\n    \"\"\"The output of :class:`TransformerDecoder`.\n\n    Attributes:\n        logits: A float Tensor of shape\n            `[batch_size, max_time, vocab_size]` containing the logits.\n        sample_id: An int Tensor of shape `[batch_size, max_time]`\n            containing the sampled token indexes.\n    \"\"\"\n\n\nclass TransformerDecoder(ModuleBase):\n    \"\"\"Transformer decoder that applies multi-head attention for\n    sequence decoding.\n    Stacked `~texar.modules.encoders.MultiheadAttentionEncoder` for\n    encoder-decoder attention and self attention,\n    `~texar.modules.FeedForwardNetwork` and residual connections.\n\n    Use the passed `embedding` variable as the parameters of the\n    transform layer from output to logits.\n\n    Args:\n        embedding: A Tensor of shape `[vocab_size, dim]` containing the\n            word embeddng. The Tensor is used as the decoder output layer.\n        hparams (dict or HParams, optional): Hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n\n    .. document private functions\n    .. automethod:: _build\n    \"\"\"\n    def __init__(self, embedding, hparams=None):\n        ModuleBase.__init__(self, hparams)\n\n        with tf.variable_scope(self.variable_scope):\n            if self._hparams.initializer:\n                tf.get_variable_scope().set_initializer(\n                    layers.get_initializer(self._hparams.initializer))\n\n            self.position_embedder = \\\n                SinusoidsPositionEmbedder(\n                    self._hparams.position_embedder_hparams)\n\n            self._embedding = embedding\n            self._vocab_size = self._embedding.get_shape().as_list()[0]\n\n            self.output_layer = \\\n                self._build_output_layer(shape_list(self._embedding)[-1])\n            self.multihead_attentions = {\n                'self_att': [],\n                'encdec_att': []\n            }\n            self.poswise_networks = []\n            for i in range(self._hparams.num_blocks):\n                layer_name = 'layer_{}'.format(i)\n                with tf.variable_scope(layer_name):\n                    with tf.variable_scope(\"self_attention\"):\n                        multihead_attention = MultiheadAttentionEncoder(\n                            self._hparams.multihead_attention)\n                        self.multihead_attentions['self_att'].append(\n                            multihead_attention)\n                    # pylint: disable=protected-access\n                    if self._hparams.dim != \\\n                        multihead_attention._hparams.output_dim:\n                        raise ValueError('The output dimenstion of'\n                                         'MultiheadEncoder should be equal'\n                                         'to the dim of TransformerDecoder')\n\n                    with tf.variable_scope('encdec_attention'):\n                        multihead_attention = MultiheadAttentionEncoder(\n                            self._hparams.multihead_attention)\n                        self.multihead_attentions['encdec_att'].append(\n                            multihead_attention)\n                    if self._hparams.dim != \\\n                        multihead_attention._hparams.output_dim:\n                        raise ValueError('The output dimenstion of'\n                                         'MultiheadEncoder should be equal'\n                                         'to the dim of TransformerDecoder')\n\n                    poswise_network = FeedForwardNetwork(\n                        hparams=self._hparams['poswise_feedforward'])\n                    if self._hparams.dim != \\\n                        poswise_network._hparams.layers[-1]['kwargs']['units']:\n                        raise ValueError('The output dimenstion of'\n                                         'FeedForwardNetwork should be equal'\n                                         'to the dim of TransformerDecoder')\n                    self.poswise_networks.append(poswise_network)\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        .. code-block:: python\n\n            {\n                # Same as in TransformerEncoder\n                \"num_blocks\": 6,\n                \"dim\": 512,\n                \"position_embedder_hparams\": None,\n                \"embedding_dropout\": 0.1,\n                \"residual_dropout\": 0.1,\n                \"poswise_feedforward\": default_transformer_poswise_net_hparams,\n                \"multihead_attention\": {\n                    \"num_units\": 512,\n                    \"num_heads\": 8,\n                },\n                \"initializer\": None,\n                # Additional for TransformerDecoder\n                \"embedding_tie\": True,\n                \"output_layer_bias\": False,\n                \"max_decoding_length\": 1e10,\n                \"name\": \"transformer_decoder\"\n            }\n\n        Here:\n\n        \"num_blocks\" : int\n            Number of stacked blocks.\n\n        \"dim\" : int\n            Hidden dimension of the encoder.\n\n        \"position_embedder_hparams\" : dict, optional\n            Hyperparameters of a\n            :class:`~texar.modules.SinusoidsPositionEmbedder` as position\n            embedder. If `None`, the\n            :meth:`~texar.modules.SinusoidsPositionEmbedder.default_hparams`\n            is used.\n\n        \"embedding_dropout\": float\n            Dropout rate of the input word and position embeddings.\n\n        \"residual_dropout\" :  float\n            Dropout rate of the residual connections.\n\n        \"poswise_feedforward\" : dict,\n            Hyperparameters for a feed-forward network used in residual\n            connections.\n            Make sure the dimension of the output tensor is equal to `dim`.\n\n            See :func:`~texar.modules.default_transformer_poswise_net_hparams`\n            for details.\n\n        \"multihead_attention\": dict,\n            Hyperparameters for the multihead attention strategy.\n            Make sure the `output_dim` in this module is equal to `dim`.\n\n            See :func:\n                `~texar.modules.encoder.MultiheadAttentionEncoder.\n                default_harams` for details.\n            `\n        \"initializer\" : dict, optional\n            Hyperparameters of the default initializer that initializes\n            variables created in this module.\n            See :func:`~texar.core.get_initializer` for details.\n\n        \"embedding_tie\" : bool\n            Whether to use the word embedding matrix as the output layer\n            that computes logits. If `False`, an additional dense layer\n            is created.\n\n        \"output_layer_bias\" : bool\n            Whether to use bias to the output layer.\n\n        \"max_decoding_length\" : int\n            The maximum allowed number of decoding steps.\n            Set to a very large number of avoid the length constraint.\n            Ignored if provided in :meth:`_build` or\n            \"train_greedy\" decoding is used.\n\n            Length penalty coefficient. Refer to\n            https://arxiv.org/abs/1609.08144 for more details.\n\n        \"name\" : str\n            Name of the module.\n        \"\"\"\n        return {\n            \"num_blocks\": 6,\n            \"initializer\": None,\n            \"position_embedder_hparams\": None,\n            \"embedding_tie\": True,\n            \"output_layer_bias\": False,\n            \"max_decoding_length\": 1e10,\n            \"embedding_dropout\": 0.1,\n            \"residual_dropout\": 0.1,\n            \"poswise_feedforward\": default_transformer_poswise_net_hparams(),\n            'multihead_attention': {\n                'num_units': 512,\n                'dropout_rate': 0.1,\n                'output_dim': 512,\n                'num_heads': 8,\n            },\n            \"dim\": 512,\n            \"name\": \"transformer_decoder\",\n        }\n\n    def _prepare_tokens_to_embeds(self, tokens):\n        \"\"\" a callable function to transform tokens into embeddings.\"\"\"\n        token_emb = tf.nn.embedding_lookup(self._embedding, tokens)\n        return token_emb\n\n    def _symbols_to_logits_fn(self, embedding_fn, max_length):\n        \"\"\"Returns a function that accepts the decoded tokens and related\n        decoding status, and returns the logits of next token.\n        \"\"\"\n        positions = tf.expand_dims(tf.range(max_length, dtype=tf.int32), 0)\n        timing_signal = self.position_embedder(positions)\n        #you can use the comment to prevent the model to decode <UNK> token\n        #biases = np.ones([1, self._vocab_size])\n        #biases[0][3] = -np.inf\n        def _impl(ids, step, cache):\n            \"\"\"The function is called in dynamic decoding.\n\n            `ids` should be next_id of shape `[batch_size, decoded_lenth]`\n\n            Returned logits is of shape `[batch_size, vocab_size]`\n            \"\"\"\n            ids = ids[:, -1:]\n            inputs = embedding_fn(ids)\n            # Multiply embedding by sqrt of its dimention\n            inputs *= self._embedding.shape.as_list()[-1]**0.5\n            inputs += timing_signal[:, step:step+1]\n            outputs = self._self_attention_stack(\n                inputs,\n                memory=cache['memory'],\n                cache=cache,\n            )\n            logits = self.output_layer(outputs)\n            logits = tf.squeeze(logits, axis=[1])\n            #logits = tf.multiply(logits, biases)\n            return logits, cache\n\n        return _impl\n\n    def _build(self,    # pylint: disable=arguments-differ\n               memory,\n               memory_sequence_length=None,\n               memory_attention_bias=None,\n               inputs=None,\n               sequence_length=None,\n               decoding_strategy='train_greedy',\n               beam_width=1,\n               alpha=0,\n               start_tokens=None,\n               end_token=None,\n               max_decoding_length=None,\n               mode=None):\n        \"\"\"Performs decoding.\n\n        The decoder supports 4 decoding strategies. For the first 3 strategies,\n        set :attr:`decoding_strategy` to the respective string.\n\n        - **\"train_greedy\"**: decoding in teacher-forcing fashion \\\n          (i.e., feeding \\\n          ground truth to decode the next step), and for each step sample \\\n          is obtained by taking the `argmax` of logits. \\\n          Argument :attr:`inputs` is required for this strategy. \\\n          :attr:`sequence_length` is optional.\n        - **\"infer_greedy\"**: decoding in inference fashion (i.e., feeding \\\n          `generated` sample to decode the next step), and for each\n          step sample is obtained by taking the `argmax` of logits.\\\n          Arguments :attr:`(start_tokens, end_token)` are \\\n          required for this strategy, and argument \\\n          :attr:`max_decoding_length` is optional.\n        - **\"infer_sample\"**: decoding in inference fashion, and for each step\\\n          sample is obtained by `random sampling` from the logits.\n          Arguments :attr:`(start_tokens, end_token)` are \\\n          required for this strategy, and argument \\\n          :attr:`max_decoding_length` is optional.\n        - **Beam Search**: set :attr:`beam_width` to > 1 to use beam search \\\n          decoding.\\\n          Arguments :attr:`(start_tokens, end_token)` are \\\n          required, and argument \\\n          :attr:`max_decoding_length` is optional.\n\n        Args:\n            memory: The memory to attend, e.g., the output of an RNN encoder.\n                A Tensor of shape `[batch_size, memory_max_time, dim]`.\n            memory_sequence_length (optional): A Tensor of shape `[batch_size]`\n                containing the sequence lengths for the batch entries in\n                memory. Used to create attention bias of\n                :attr:`memory_attention_bias` is not given. Ignored if\n                `memory_attention_bias` is provided.\n            memory_attention_bias (optional): A Tensor of shape\n                `[batch_size, num_heads, memory_max_time, dim]`.\n                An attention bias typically sets the value of a padding\n                position to a large negative value for masking. If not given,\n                :attr:`memory_sequence_length` is used to automatically\n                create an attention bias.\n            inputs (optional): Input tensor for teacher forcing decoding, of\n                shape `[batch_size, target_max_time, emb_dim]` containing the\n                target sequence word embeddings.\n                Used when :attr:`decoding_strategy` is set to \"train_greedy\".\n            sequence_length (optional): A Tensor of shape `[batch_size]`,\n                containing the sequence length of :attr:`inputs`.\n                Tokens beyond the respective sequence length are masked out.\n                Used when :attr:`decoding_strategy` is set to\n                \"train_greedy\".\n            decoding_strategy (str): A string specifying the decoding\n                strategy, including \"train_greedy\", \"infer_greedy\",\n                \"infer_sample\".\n                Different arguments are required based on the\n                strategy. See above for details. Ignored if\n                :attr:`beam_width` > 1.\n            beam_width (int): Set to > 1 to use beam search.\n            alpha (float): Length penalty coefficient.\n                Refer to https://arxiv.org/abs/1609.08144\n                for more details.\n            start_tokens (optional): An int Tensor of shape `[batch_size]`,\n                containing the start tokens.\n                Used when `decoding_strategy` = \"infer_greedy\" or\n                \"infer_sample\", or `beam_width` > 1.\n            end_token (optional): An int 0D Tensor, the token that marks end\n                of decoding.\n                Used when `decoding_strategy` = \"infer_greedy\" or\n                \"infer_sample\", or `beam_width` > 1.\n            max_decoding_length (optional): An int scalar Tensor indicating\n                the maximum allowed number of decoding steps.\n                If `None` (default), use \"max_decoding_length\" defined in\n                :attr:`hparams`. Ignored in \"train_greedy\" decoding.\n            mode (optional): A tensor taking value in\n                :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, including\n                `TRAIN`, `EVAL`, and `PREDICT`. Controls dropout mode.\n                If `None` (default), :func:`texar.global_mode`\n                is used.\n\n        Returns:\n\n            - For **\"train_greedy\"** decoding, returns an instance of \\\n            :class:`~texar.modules.TransformerDecoderOutput` which contains\\\n            `sample_id` and `logits`.\n\n            - For **\"infer_greedy\"** and **\"infer_sample\"** decoding, returns\\\n            a tuple `(outputs, sequence_lengths)`, where `outputs` is an \\\n            instance of :class:`~texar.modules.TransformerDecoderOutput` as\\\n            in \"train_greedy\", and `sequence_lengths` is a Tensor of shape\\\n            `[batch_size]` containing the length of each sample.\n\n            - For **beam_search** decoding, returns a `dict` containing keys\\\n            \"sample_id\" and \"log_prob\".\n\n                - **\"sample_id\"** is an int Tensor of shape \\\n                `[batch_size, max_time, beam_width]` containing generated\\\n                token indexes. `sample_id[:,:,0]` is the highest-probable \\\n                sample.\n                - **\"log_porb\"** is a float Tensor of shape \\\n                `[batch_size, beam_width]` containing the log probability \\\n                of each sequence sample.\n        \"\"\"\n        if memory_attention_bias is None:\n            if memory_sequence_length is None:\n                raise ValueError(\n                    \"`memory_sequence_length` is required if \"\n                    \"`memory_attention_bias` is not given.\")\n\n            #enc_padding = 1 - mask_sequences(tf.ones_like(memory),\n            #                                 memory_sequence_length,\n            #                                 tensor_rank=3)[:, :, 0]\n            enc_padding = 1 - tf.sequence_mask(\n                memory_sequence_length, tf.shape(memory)[1], dtype=tf.float32)\n            memory_attention_bias = attn.attention_bias_ignore_padding(\n                enc_padding)\n\n        if beam_width <= 1 and decoding_strategy == 'train_greedy':\n            if sequence_length is not None:\n                inputs = mask_sequences(inputs, sequence_length, tensor_rank=3)\n\n            decoder_self_attention_bias = (\n                attn.attention_bias_lower_triangle(\n                    shape_list(inputs)[1]))\n            target_inputs = inputs * self._hparams.dim**0.5\n\n            _, lengths, _ = shape_list(target_inputs)\n            positions = tf.expand_dims(tf.range(lengths, dtype=tf.int32), 0)\n            pos_embeds = self.position_embedder(positions)\n\n            inputs = target_inputs + pos_embeds\n\n            decoder_output = self._self_attention_stack(\n                inputs,\n                memory,\n                decoder_self_attention_bias=decoder_self_attention_bias,\n                memory_attention_bias=memory_attention_bias,\n                cache=None,\n                mode=mode)\n            logits = self.output_layer(decoder_output)\n            preds = tf.to_int32(tf.argmax(logits, axis=-1))\n            output = TransformerDecoderOutput(\n                logits=logits,\n                sample_id=preds\n            )\n            rets = output\n\n        else: # Inference decoding\n\n            if max_decoding_length is None:\n                max_decoding_length = self._hparams.max_decoding_length\n\n            if beam_width <= 1:\n                logits, preds, sequence_length = self._infer_decoding(\n                    self._prepare_tokens_to_embeds,\n                    start_tokens,\n                    end_token,\n                    decode_length=max_decoding_length,\n                    memory=memory,\n                    memory_attention_bias=memory_attention_bias,\n                    decoding_strategy=decoding_strategy,\n                )\n                output = TransformerDecoderOutput(\n                    logits=logits,\n                    sample_id=preds)\n                rets = output, sequence_length\n            else:\n                # The output format is different when running beam search\n                sample_id, log_prob = self._beam_decode(\n                    self._prepare_tokens_to_embeds,\n                    start_tokens,\n                    end_token,\n                    beam_width=beam_width,\n                    alpha=alpha,\n                    decode_length=max_decoding_length,\n                    memory=memory,\n                    memory_attention_bias=memory_attention_bias,\n                )\n                predictions = {\n                    'sample_id': sample_id,\n                    'log_prob': log_prob\n                }\n                rets = predictions\n\n        if not self._built:\n            self._add_internal_trainable_variables()\n            self._built = True\n\n        return rets\n\n    def _self_attention_stack(self,\n                              inputs,\n                              memory,\n                              decoder_self_attention_bias=None,\n                              memory_attention_bias=None,\n                              cache=None,\n                              mode=None):\n        \"\"\"Stacked multihead attention module.\n        \"\"\"\n        inputs = tf.layers.dropout(inputs,\n                                   rate=self._hparams.embedding_dropout,\n                                   training=is_train_mode(mode))\n        if cache is not None:\n            memory_attention_bias = \\\n                cache['memory_attention_bias']\n        else:\n            assert decoder_self_attention_bias is not None\n\n        x = inputs\n        for i in range(self._hparams.num_blocks):\n            layer_name = 'layer_{}'.format(i)\n            layer_cache = cache[layer_name] if cache is not None else None\n            with tf.variable_scope(layer_name):\n                with tf.variable_scope(\"self_attention\"):\n                    multihead_attention = \\\n                        self.multihead_attentions['self_att'][i]\n                    selfatt_output = multihead_attention(\n                        queries=layers.layer_normalize(x),\n                        memory=None,\n                        memory_attention_bias=decoder_self_attention_bias,\n                        cache=layer_cache,\n                        mode=mode,\n                    )\n                    x = x + tf.layers.dropout(\n                        selfatt_output,\n                        rate=self._hparams.residual_dropout,\n                        training=is_train_mode(mode),\n                    )\n                if memory is not None:\n                    with tf.variable_scope('encdec_attention'):\n                        multihead_attention = \\\n                            self.multihead_attentions['encdec_att'][i]\n                        encdec_output = multihead_attention(\n                            queries=layers.layer_normalize(x),\n                            memory=memory,\n                            memory_attention_bias=memory_attention_bias,\n                            mode=mode,\n                        )\n                        x = x + tf.layers.dropout(\n                            encdec_output,\n                            rate=self._hparams.residual_dropout,\n                            training=is_train_mode(mode))\n                poswise_network = self.poswise_networks[i]\n                with tf.variable_scope('past_poswise_ln'):\n                    sub_output = tf.layers.dropout(\n                        poswise_network(layers.layer_normalize(x)),\n                        rate=self._hparams.residual_dropout,\n                        training=is_train_mode(mode),\n                    )\n                    x = x + sub_output\n\n        return layers.layer_normalize(x)\n\n    def _build_output_layer(self, dim):\n        if self._hparams.embedding_tie:\n            if self._hparams.output_layer_bias:\n                with tf.variable_scope(self.variable_scope):\n                    affine_bias = tf.get_variable(\n                        'affine_bias', [self._vocab_size])\n            else:\n                affine_bias = None\n\n            def _outputs_to_logits(outputs):\n                shape = shape_list(outputs)\n                outputs = tf.reshape(outputs, [-1, dim])\n                logits = tf.matmul(outputs, self._embedding, transpose_b=True)\n                if affine_bias is not None:\n                    logits += affine_bias\n                logits = tf.reshape(logits, shape[:-1] + [self._vocab_size])\n                return logits\n\n            return _outputs_to_logits\n        else:\n            layer = tf.layers.Dense(\n                self._vocab_size,\n                use_bias=self._hparams.output_layer_bias)\n            layer.build([None, dim])\n            return layer\n\n    def _init_cache(self, memory, memory_attention_bias):\n        cache = {\n            'memory': memory,\n            'memory_attention_bias': memory_attention_bias,\n        }\n        batch_size = tf.shape(memory)[0]\n        depth = self._hparams.multihead_attention.num_units\n        for l in range(self._hparams.num_blocks):\n            cache['layer_{}'.format(l)] = {\n                'self_keys': tf.zeros([batch_size, 0, depth]),\n                'self_values': tf.zeros([batch_size, 0, depth]),\n                'memory_keys': tf.zeros([batch_size, 0, depth]),\n                'memory_values': tf.zeros([batch_size, 0, depth]),\n            }\n        return cache\n\n    def _infer_decoding(self,\n                        embedding_fn,\n                        start_tokens,\n                        end_token,\n                        decode_length,\n                        memory,\n                        memory_attention_bias,\n                        decoding_strategy):\n        \"\"\"Performs \"infer_greedy\" or \"infer_sample\" decoding.\n        \"\"\"\n        batch_size = tf.shape(start_tokens)[0]\n        finished = tf.fill([batch_size], False)\n        seq_length = tf.zeros([batch_size], dtype=tf.int32)\n        step = tf.constant(0)\n        decoded_ids = tf.zeros([batch_size, 0], dtype=tf.int32)\n        logits_list = tf.zeros([batch_size, 0, self._vocab_size],\n                               dtype=tf.float32)\n        next_id = tf.expand_dims(start_tokens, 1)\n\n        cache = self._init_cache(memory, memory_attention_bias)\n        symbols_to_logits_fn = self._symbols_to_logits_fn(\n            embedding_fn,\n            max_length=decode_length+1\n        )\n\n        def _body(step, finished, next_id, decoded_ids, cache, logits_list,\n                  seq_length):\n            logits, cache = symbols_to_logits_fn(next_id, step, cache)\n\n            if decoding_strategy == 'infer_greedy':\n                next_id = tf.argmax(logits, -1, output_type=tf.int32)\n            elif decoding_strategy == 'infer_sample':\n                sample_id_sampler = tf.distributions.Categorical(logits=logits)\n                next_id = sample_id_sampler.sample()\n\n            cur_finished = tf.equal(next_id, end_token)\n\n            update_len = tf.logical_and(\n                tf.logical_not(finished),\n                cur_finished)\n            seq_length = tf.where(\n                update_len,\n                tf.fill(tf.shape(seq_length), step+1),\n                seq_length)\n\n            next_id = tf.expand_dims(next_id, axis=1)\n\n            finished |= cur_finished\n\n            # Keep the shape as [batch_size, seq_len]\n            logits = tf.expand_dims(logits, axis=1)\n            logits_list = tf.concat([logits_list, logits], axis=1)\n            decoded_ids = tf.concat([decoded_ids, next_id], axis=1)\n\n            return step+1, finished, next_id, decoded_ids, cache, \\\n                    logits_list, seq_length\n\n        def _not_finished(i, finished, *_):\n            return (i < decode_length) & tf.logical_not(tf.reduce_all(finished))\n\n        _, _, _, decoded_ids, _, logits_list, seq_length = tf.while_loop(\n            _not_finished,\n            _body,\n            loop_vars=(step, finished, next_id, decoded_ids, cache, logits_list,\n                       seq_length),\n            shape_invariants=(\n                tf.TensorShape([]),\n                tf.TensorShape([None]),\n                tf.TensorShape([None, None]),\n                tf.TensorShape([None, None]),\n                nest.map_structure(beam_search.get_state_shape_invariants,\n                                   cache),\n                tf.TensorShape([None, None, None]),\n                tf.TensorShape([None])\n                )\n            )\n\n        return logits_list, decoded_ids, seq_length\n\n    def _beam_decode(self,\n                     embedding_fn,\n                     start_tokens,\n                     end_token,\n                     memory,\n                     memory_attention_bias,\n                     decode_length=256,\n                     beam_width=5,\n                     alpha=0.6):\n        cache = self._init_cache(memory, memory_attention_bias)\n        symbols_to_logits_fn = self._symbols_to_logits_fn(\n            embedding_fn,\n            max_length=decode_length+1)\n        outputs, log_prob = beam_search.beam_search(\n            symbols_to_logits_fn,\n            start_tokens,\n            beam_width,\n            decode_length,\n            self._vocab_size,\n            alpha,\n            states=cache,\n            eos_id=end_token)\n\n        # Ignores <BOS>\n        outputs = outputs[:, :, 1:]\n        # shape = [batch_size, seq_length, beam_width]\n        outputs = tf.transpose(outputs, [0, 2, 1])\n        return (outputs, log_prob)\n"
  },
  {
    "path": "texar_repo/texar/modules/decoders/transformer_decoders_test.py",
    "content": "#\n\"\"\"\nUnit tests for Transformer decodre.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport tensorflow as tf\n\nfrom texar.modules.decoders.transformer_decoders import TransformerDecoder\nfrom texar.modules.decoders.transformer_decoders import TransformerDecoderOutput\n\n# pylint: disable=too-many-instance-attributes\n\n\nclass TransformerDecoderTest(tf.test.TestCase):\n    \"\"\"Tests :class:`~texar.modules.TransformerDecoder`\n    \"\"\"\n\n    def setUp(self):\n        tf.test.TestCase.setUp(self)\n        self._vocab_size = 15\n        self._batch_size = 6\n        self._max_time = 10\n        self._emb_dim = 512\n        self._max_decode_len = 32\n        self._inputs = tf.random_uniform(\n            [self._batch_size, self._max_time, self._emb_dim],\n            maxval=1, dtype=tf.float32)\n        self._memory = tf.random_uniform(\n            [self._batch_size, self._max_time, self._emb_dim],\n            maxval=1, dtype=tf.float32)\n        self._memory_sequence_length = tf.random_uniform(\n            [self._batch_size], maxval=self._max_time, dtype=tf.int32)\n        self._embedding = tf.random_uniform(\n            [self._vocab_size, self._emb_dim], maxval=1, dtype=tf.float32)\n        self._start_tokens = tf.fill([self._batch_size], 1)\n        self.max_decoding_length = self._max_time\n\n    def test_train(self):\n        \"\"\"Tests train_greedy\n        \"\"\"\n        decoder = TransformerDecoder(embedding=self._embedding)\n        # 6 blocks\n        # -self multihead_attention: 4 dense without bias + 2 layer norm vars\n        # -encdec multihead_attention: 4 dense without bias + 2 layer norm vars\n        # -poswise_network: Dense with bias, Dense with bias + 2 layer norm vars\n        # 2 layer norm vars\n        outputs = decoder(memory=self._memory,\n                          memory_sequence_length=self._memory_sequence_length,\n                          memory_attention_bias=None,\n                          inputs=self._inputs,\n                          decoding_strategy='train_greedy',\n                          mode=tf.estimator.ModeKeys.TRAIN)\n        self.assertEqual(len(decoder.trainable_variables), 110)\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            outputs_ = sess.run(outputs)\n\n            self.assertIsInstance(outputs_, TransformerDecoderOutput)\n\n    def test_infer_greedy(self):\n        \"\"\"Tests train_greedy\n        \"\"\"\n        decoder = TransformerDecoder(embedding=self._embedding)\n        outputs, length = decoder(\n            memory=self._memory,\n            memory_sequence_length=self._memory_sequence_length,\n            memory_attention_bias=None,\n            inputs=None,\n            decoding_strategy='infer_greedy',\n            beam_width=1,\n            start_tokens=self._start_tokens,\n            end_token=2,\n            max_decoding_length=self._max_decode_len,\n            mode=tf.estimator.ModeKeys.PREDICT)\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            outputs_ = sess.run(outputs)\n            self.assertIsInstance(outputs_, TransformerDecoderOutput)\n\n    def test_infer_sample(self):\n        \"\"\"Tests infer_sample\n        \"\"\"\n        decoder = TransformerDecoder(embedding=self._embedding)\n        outputs, length = decoder(\n            memory=self._memory,\n            memory_sequence_length=self._memory_sequence_length,\n            memory_attention_bias=None,\n            inputs=None,\n            decoding_strategy='infer_sample',\n            beam_width=1,\n            start_tokens=self._start_tokens,\n            end_token=2,\n            max_decoding_length=self._max_decode_len,\n            mode=tf.estimator.ModeKeys.PREDICT)\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            outputs_ = sess.run(outputs)\n            self.assertIsInstance(outputs_, TransformerDecoderOutput)\n\n\n    def test_beam_search(self):\n        \"\"\"Tests beam_search\n        \"\"\"\n        decoder = TransformerDecoder(embedding=self._embedding)\n        outputs = decoder(\n            memory=self._memory,\n            memory_sequence_length=self._memory_sequence_length,\n            memory_attention_bias=None,\n            inputs=None,\n            beam_width=5,\n            start_tokens=self._start_tokens,\n            end_token=2,\n            max_decoding_length=self._max_decode_len,\n            mode=tf.estimator.ModeKeys.PREDICT)\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            outputs_ = sess.run(outputs)\n            self.assertEqual(outputs_['log_prob'].shape,\n                             (self._batch_size, 5))\n            self.assertEqual(outputs_['sample_id'].shape,\n                             (self._batch_size, self._max_decode_len, 5))\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/modules/embedders/__init__.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nModules of texar library embedders.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=wildcard-import\n\nfrom texar.modules.embedders.embedder_base import *\nfrom texar.modules.embedders.embedders import *\nfrom texar.modules.embedders.position_embedders import *\n"
  },
  {
    "path": "texar_repo/texar/modules/embedders/embedder_base.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nThe base embedder class.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom texar.module_base import ModuleBase\nfrom texar.modules.embedders import embedder_utils\n\n# pylint: disable=invalid-name\n\n__all__ = [\n    \"EmbedderBase\"\n]\n\nclass EmbedderBase(ModuleBase):\n    \"\"\"The base embedder class that all embedder classes inherit.\n\n    Args:\n        num_embeds (int, optional): The number of embedding elements, e.g.,\n            the vocabulary size of a word embedder.\n        hparams (dict or HParams, optional): Embedder hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n    \"\"\"\n\n    def __init__(self, num_embeds=None, hparams=None):\n        ModuleBase.__init__(self, hparams)\n\n        self._num_embeds = num_embeds\n\n    # pylint: disable=attribute-defined-outside-init\n    def _init_parameterized_embedding(self, init_value, num_embeds, hparams):\n        self._embedding = embedder_utils.get_embedding(\n            hparams, init_value, num_embeds, self.variable_scope)\n        if hparams.trainable:\n            self._add_trainable_variable(self._embedding)\n\n        self._num_embeds = self._embedding.get_shape().as_list()[0]\n\n        self._dim = self._embedding.get_shape().as_list()[1:]\n        self._dim_rank = len(self._dim)\n        if self._dim_rank == 1:\n            self._dim = self._dim[0]\n\n    def _get_dropout_layer(self, hparams, ids_rank=None, dropout_input=None,\n                           dropout_strategy=None):\n        \"\"\"Creates dropout layer according to dropout strategy.\n\n        Called in :meth:`_build()`.\n        \"\"\"\n        dropout_layer = None\n\n        st = dropout_strategy\n        st = hparams.dropout_strategy if st is None else st\n\n        if hparams.dropout_rate > 0.:\n            if st == 'element':\n                noise_shape = None\n            elif st == 'item':\n                noise_shape = tf.concat([tf.shape(dropout_input)[:ids_rank],\n                                         tf.ones([self._dim_rank], tf.int32)],\n                                        axis=0)\n            elif st == 'item_type':\n                noise_shape = [None] + [1] * self._dim_rank\n            else:\n                raise ValueError('Unknown dropout strategy: {}'.format(st))\n\n            dropout_layer = tf.layers.Dropout(\n                rate=hparams.dropout_rate, noise_shape=noise_shape)\n\n        return dropout_layer\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        .. code-block:: python\n\n            {\n                \"name\": \"embedder\"\n            }\n        \"\"\"\n        return {\n            \"name\": \"embedder\"\n        }\n\n    def _build(self, *args, **kwargs):\n        raise NotImplementedError\n\n    @property\n    def num_embeds(self):\n        \"\"\"The number of embedding elements.\n        \"\"\"\n        return self._num_embeds\n"
  },
  {
    "path": "texar_repo/texar/modules/embedders/embedder_utils.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Utils of embedder.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\n\nimport tensorflow as tf\n\nfrom texar.hyperparams import HParams\nfrom texar.core import layers\n\n__all__ = [\n    \"default_embedding_hparams\",\n    \"get_embedding\",\n    \"soft_embedding_lookup\"\n]\n\ndef default_embedding_hparams():\n    \"\"\"Returns a `dict` of hyperparameters and default values of a embedder.\n\n     See :meth:`~texar.modules.WordEmbedder.default_hparams` for details.\n\n        .. code-block:: python\n\n            {\n                \"name\": \"embedding\",\n                \"dim\": 100,\n                \"initializer\": None,\n                \"regularizer\": {\n                    \"type\": \"L1L2\",\n                    \"kwargs\": {\n                        \"l1\": 0.,\n                        \"l2\": 0.\n                    }\n                },\n                \"dropout_rate\": 0.,\n                \"dropout_strategy\": 'element',\n                \"trainable\": True,\n            }\n\n        Here:\n\n        \"name\" : str\n            Name of the embedding variable.\n\n        \"dim\" : int or list\n            Embedding dimension. Can be a list of integers to yield embeddings\n            with dimensionality > 1.\n\n        \"initializer\" : dict or None\n            Hyperparameters of the initializer for the embedding values. An\n            example is as\n\n            .. code-block:: python\n\n                {\n                    \"type\": \"random_uniform_initializer\",\n                    \"kwargs\": {\n                        \"minval\": -0.1,\n                        \"maxval\": 0.1,\n                        \"seed\": None\n                    }\n                }\n\n            which corresponds to :tf_main:`tf.random_uniform_initializer\n            <random_uniform_initializer>`, and includes:\n\n            \"type\" : str or initializer instance\n                Name, full path, or instance of the initializer class; Or name\n                or full path to a function that returns the initializer class.\n                The class or function can be\n\n                - Built-in initializer defined in \\\n                  :tf_main:`tf.initializers <initializers>`, e.g., \\\n                  :tf_main:`random_uniform <random_uniform_initializer>` \\\n                  (a.k.a :class:`tf.random_uniform_initializer`), or \\\n                  in :mod:`tf`, e.g., :tf_main:`glorot_uniform_initializer \\\n                  <glorot_uniform_initializer>`, or in \\\n                  :tf_main:`tf.keras.initializers <keras/initializers>`.\n                - User-defined initializer in :mod:`texar.custom`.\n                - External initializer. Must provide the full path, \\\n                  e.g., :attr:`\"my_module.MyInitializer\"`, or the instance.\n\n            \"kwargs\" : dict\n                A dictionary of arguments for constructor of the\n                initializer class or for the function. An initializer is\n                created by `initialzier = initializer_class_or_fn(**kwargs)`\n                where :attr:`initializer_class_or_fn` is specified in\n                :attr:`\"type\"`.\n                Ignored if :attr:`\"type\"` is an initializer instance.\n\n        \"regularizer\" : dict\n            Hyperparameters of the regularizer for the embedding values. The\n            regularizer must be an instance of\n            the base :tf_main:`Regularizer <keras/regularizers/Regularizer>`\n            class. The hyperparameters include:\n\n            \"type\" : str or Regularizer instance\n                Name, full path, or instance of the regularizer class. The\n                class can be\n\n                - Built-in regularizer defined in\n                  :tf_main:`tf.keras.regularizers <keras/regularizers>`, e.g.,\n                  :tf_main:`L1L2 <keras/regularizers/L1L2>`.\n                - User-defined regularizer in :mod:`texar.custom`. The\n                  regularizer class should inherit the base class\n                  :tf_main:`Regularizer <keras/regularizers/Regularizer>`.\n                - External regularizer. Must provide the full path, \\\n                  e.g., :attr:`\"my_module.MyRegularizer\"`, or the instance.\n\n            \"kwargs\" : dict\n                A dictionary of arguments for constructor of the\n                regularizer class. A regularizer is created by\n                calling `regularizer_class(**kwargs)` where\n                :attr:`regularizer_class` is specified in :attr:`\"type\"`.\n                Ignored if :attr:`\"type\"` is a Regularizer instance.\n\n            The default value corresponds to\n            :tf_main:`L1L2 <keras/regularizers/L1L2>` with `(l1=0, l2=0)`,\n            which disables regularization.\n\n        \"dropout_rate\" : float\n            The dropout rate between 0 and 1. E.g., `dropout_rate=0.1` would\n            drop out 10% of the embedding.\n\n        \"dropout_strategy\" : str\n            The dropout strategy. Can be one of the following\n\n            - 'element': The regular strategy that drops individual elements \\\n              in the embedding vectors.\n            - 'item': Drops individual items (e.g., words) entirely. E.g., for \\\n              the word sequence 'the simpler the better', the strategy can \\\n              yield '_ simpler the better', where the first `the` is dropped.\n            - 'item_type': Drops item types (e.g., word types). E.g., for the \\\n              above sequence, the strategy can yield '_ simpler _ better', \\\n              where the word type 'the' is dropped. The dropout will never \\\n              yield '_ simpler the better' as in the 'item' strategy.\n\n        \"trainable\" : bool\n            Whether the embedding is trainable.\n    \"\"\"\n    return {\n        \"name\": \"embedding\",\n        \"dim\": 100,\n        \"initializer\": None,\n        \"regularizer\": layers.default_regularizer_hparams(),\n        \"dropout_rate\": 0.,\n        \"dropout_strategy\": 'element',\n        \"trainable\": True,\n        \"@no_typecheck\": [\"dim\"]\n    }\n\n\ndef get_embedding(hparams=None,\n                  init_value=None,\n                  num_embeds=None,\n                  variable_scope='Embedding'):\n    \"\"\"Creates embedding variable if not exists.\n\n    Args:\n        hparams (dict or HParams, optional): Embedding hyperparameters. Missing\n            hyperparameters are set to default values. See\n            :func:`~texar.modules.default_embedding_hparams`\n            for all hyperparameters and default values.\n\n            If :attr:`init_value` is given, :attr:`hparams[\"initializer\"]`,\n            and :attr:`hparams[\"dim\"]` are ignored.\n        init_value (Tensor or numpy array, optional): Initial values of the\n            embedding variable. If not given, embedding is initialized as\n            specified in :attr:`hparams[\"initializer\"]`.\n        num_embeds (int, optional): The number of embedding items\n            (e.g., vocabulary size). Required if :attr:`init_value` is\n            not provided.\n        variable_scope (str or VariableScope, optional): Variable scope of\n            the embedding variable.\n\n    Returns:\n        Variable or Tensor: A 2D `Variable` or `Tensor` of the same shape with\n        :attr:`init_value` or of the shape\n        :attr:`[num_embeds, hparams[\"dim\"]]`.\n    \"\"\"\n    with tf.variable_scope(variable_scope):\n        if hparams is None or isinstance(hparams, dict):\n            hparams = HParams(hparams, default_embedding_hparams())\n        regularizer = layers.get_regularizer(hparams[\"regularizer\"])\n        if init_value is None:\n            initializer = layers.get_initializer(hparams[\"initializer\"])\n            dim = hparams[\"dim\"]\n            if not isinstance(hparams[\"dim\"], (list, tuple)):\n                dim = [dim]\n            embedding = tf.get_variable(name='w',\n                                        shape=[num_embeds] + dim,\n                                        initializer=initializer,\n                                        regularizer=regularizer,\n                                        trainable=hparams[\"trainable\"])\n        else:\n            embedding = tf.get_variable(name='w',\n                                        initializer=tf.to_float(init_value),\n                                        regularizer=regularizer,\n                                        trainable=hparams[\"trainable\"])\n\n        return embedding\n\ndef soft_embedding_lookup(embedding, soft_ids):\n    \"\"\"Transforms soft ids (e.g., probability distribution over ids) into\n    embeddings, by mixing the embedding vectors with the soft weights.\n\n    Args:\n        embedding: A Tensor of shape `[num_classes] + embedding-dim` containing\n            the embedding vectors. Embedding can have dimensionality > 1, i.e.,\n            :attr:`embedding` can be of shape\n            `[num_classes, emb_dim_1, emb_dim_2, ...]`\n        soft_ids: A Tensor of weights (probabilities) used to mix the\n            embedding vectors.\n\n    Returns:\n        A Tensor of shape `shape(soft_ids)[:-1] + shape(embedding)[1:]`. For\n        example, if `shape(soft_ids) = [batch_size, max_time, vocab_size]`\n        and `shape(embedding) = [vocab_size, emb_dim]`, then the return tensor\n        has shape `[batch_size, max_time, emb_dim]`.\n\n    Example::\n\n        decoder_outputs, ... = decoder(...)\n        soft_seq_emb = soft_embedding_lookup(\n            embedding, tf.nn.softmax(decoder_outputs.logits))\n    \"\"\"\n    return tf.tensordot(tf.to_float(soft_ids), embedding, [-1, 0])\n"
  },
  {
    "path": "texar_repo/texar/modules/embedders/embedder_utils_test.py",
    "content": "#\n\"\"\"\nUnit tests for embedder utils.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\n# pylint: disable=no-member\n\nimport tensorflow as tf\n\nfrom texar.modules.embedders import embedder_utils\n\nclass GetEmbeddingTest(tf.test.TestCase):\n    \"\"\"Tests embedding creator.\n    \"\"\"\n    def test_get_embedding(self):\n        \"\"\"Tests :func:`~texar.modules.embedder.embedder_utils.get_embedding`.\n        \"\"\"\n        vocab_size = 100\n        emb = embedder_utils.get_embedding(num_embeds=vocab_size)\n        self.assertEqual(emb.shape[0].value, vocab_size)\n        self.assertEqual(emb.shape[1].value,\n                         embedder_utils.default_embedding_hparams()[\"dim\"])\n\n        hparams = {\n            \"initializer\": {\n                \"type\": tf.random_uniform_initializer(minval=-0.1, maxval=0.1)\n            },\n            \"regularizer\": {\n                \"type\": tf.keras.regularizers.L1L2(0.1, 0.1)\n            }\n        }\n        emb = embedder_utils.get_embedding(\n            hparams=hparams, num_embeds=vocab_size,\n            variable_scope='embedding_2')\n        self.assertEqual(emb.shape[0].value, vocab_size)\n        self.assertEqual(emb.shape[1].value,\n                         embedder_utils.default_embedding_hparams()[\"dim\"])\n\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/modules/embedders/embedders.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nVarious embedders.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom texar.modules.embedders.embedder_base import EmbedderBase\nfrom texar.modules.embedders import embedder_utils\nfrom texar.utils.mode import is_train_mode\nfrom texar.utils.shapes import get_rank\n\n__all__ = [\n    \"WordEmbedder\"\n]\n\nclass WordEmbedder(EmbedderBase):\n    \"\"\"Simple word embedder that maps indexes into embeddings. The indexes\n    can be soft (e.g., distributions over vocabulary).\n\n    Either :attr:`init_value` or :attr:`vocab_size` is required. If both are\n    given, there must be `init_value.shape[0]==vocab_size`.\n\n    Args:\n        init_value (optional): A `Tensor` or numpy array that contains the\n            initial value of embeddings. It is typically of shape\n            `[vocab_size] + embedding-dim`. Embedding can have dimensionality\n            > 1.\n\n            If `None`, embedding is initialized as specified in\n            :attr:`hparams[\"initializer\"]`. Otherwise, the\n            :attr:`\"initializer\"` and :attr:`\"dim\"`\n            hyperparameters in :attr:`hparams` are ignored.\n        vocab_size (int, optional): The vocabulary size. Required if\n            :attr:`init_value` is not given.\n        hparams (dict, optional): Embedder hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n\n    See :meth:`_build` for the inputs and outputs of the embedder.\n\n    Example:\n\n        .. code-block:: python\n\n            ids = tf.random_uniform(shape=[32, 10], maxval=10, dtype=tf.int64)\n            soft_ids = tf.random_uniform(shape=[32, 10, 100])\n\n            embedder = WordEmbedder(vocab_size=100, hparams={'dim': 256})\n            ids_emb = embedder(ids=ids) # shape: [32, 10, 256]\n            soft_ids_emb = embedder(soft_ids=soft_ids) # shape: [32, 10, 256]\n\n        .. code-block:: python\n\n            ## Use with Texar data module\n            hparams={\n                'dataset': {\n                    'embedding_init': {'file': 'word2vec.txt'}\n                    ...\n                },\n            }\n            data = MonoTextData(data_params)\n            iterator = DataIterator(data)\n            batch = iterator.get_next()\n\n            # Use data vocab size\n            embedder_1 = WordEmbedder(vocab_size=data.vocab.size)\n            emb_1 = embedder_1(batch['text_ids'])\n\n            # Use pre-trained embedding\n            embedder_2 = WordEmbedder(init_value=data.embedding_init_value)\n            emb_2 = embedder_2(batch['text_ids'])\n\n\n    .. document private functions\n    .. automethod:: _build\n    \"\"\"\n\n    def __init__(self, init_value=None, vocab_size=None, hparams=None):\n        EmbedderBase.__init__(self, hparams=hparams)\n\n        if init_value is None and vocab_size is None:\n            raise ValueError(\n                \"Either `init_value` or `vocab_size` is required.\")\n\n        self._init_parameterized_embedding(init_value, vocab_size,\n                                           self._hparams)\n\n        self._vocab_size = vocab_size\n        if vocab_size is None:\n            self._vocab_size = self._num_embeds\n        if self._vocab_size != self._num_embeds:\n            raise ValueError(\n                'vocab_size must equal to init_value.shape[0].'\n                'Got %d and %d' % (self._vocab_size, self._num_embeds))\n\n        self._built = True\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        .. code-block:: python\n\n            {\n                \"dim\": 100,\n                \"dropout_rate\": 0,\n                \"dropout_strategy\": 'element',\n                \"trainable\": True,\n                \"initializer\": {\n                    \"type\": \"random_uniform_initializer\",\n                    \"kwargs\": {\n                        \"minval\": -0.1,\n                        \"maxval\": 0.1,\n                        \"seed\": None\n                    }\n                },\n                \"regularizer\": {\n                    \"type\": \"L1L2\",\n                    \"kwargs\": {\n                        \"l1\": 0.,\n                        \"l2\": 0.\n                    }\n                },\n                \"name\": \"word_embedder\",\n            }\n\n        Here:\n\n        \"dim\" : int or list\n            Embedding dimension. Can be a list of integers to yield embeddings\n            with dimensionality > 1.\n\n            Ignored if :attr:`init_value` is given to the embedder constructor.\n\n        \"dropout_rate\" : float\n            The dropout rate between 0 and 1. E.g., `dropout_rate=0.1` would\n            drop out 10% of the embedding. Set to 0 to disable dropout.\n\n        \"dropout_strategy\" : str\n            The dropout strategy. Can be one of the following\n\n            - :attr:`\"element\"`: The regular strategy that drops individual \\\n            elements of embedding vectors.\n            - :attr:`\"item\"`: Drops individual items (e.g., words) entirely. \\\n            E.g., for \\\n            the word sequence 'the simpler the better', the strategy can \\\n            yield '_ simpler the better', where the first `the` is dropped.\n            - :attr:`\"item_type\"`: Drops item types (e.g., word types). \\\n            E.g., for the \\\n            above sequence, the strategy can yield '_ simpler _ better', \\\n            where the word type 'the' is dropped. The dropout will never \\\n            yield '_ simpler the better' as in the 'item' strategy.\n\n        \"trainable\" : bool\n            Whether the embedding is trainable.\n\n        \"initializer\" : dict or None\n            Hyperparameters of the initializer for embedding values. See\n            :func:`~texar.core.get_initializer` for the details. Ignored if\n            :attr:`init_value` is given to the embedder constructor.\n\n        \"regularizer\" : dict\n            Hyperparameters of the regularizer for embedding values. See\n            :func:`~texar.core.get_regularizer` for the details.\n\n        \"name\" : str\n            Name of the embedding variable.\n        \"\"\"\n        hparams = embedder_utils.default_embedding_hparams()\n        hparams[\"name\"] = \"word_embedder\"\n        return hparams\n\n    def _build(self, ids=None, soft_ids=None, mode=None, **kwargs):\n        \"\"\"Embeds (soft) ids.\n\n        Either :attr:`ids` or :attr:`soft_ids` must be given, and they\n        must not be given at the same time.\n\n        Args:\n            ids (optional): An integer tensor containing the ids to embed.\n            soft_ids (optional): A tensor of weights (probabilities) used to\n                mix the embedding vectors.\n            mode (optional): A tensor taking value in\n                :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, including\n                `TRAIN`, `EVAL`, and `PREDICT`. If `None`, dropout is\n                controlled by :func:`texar.global_mode`.\n            kwargs: Additional keyword arguments for\n                :tf_main:`tf.nn.embedding_lookup <nn/embedding_lookup>` besides\n                :attr:`params` and :attr:`ids`.\n\n        Returns:\n            If :attr:`ids` is given, returns a Tensor of shape\n            `shape(ids) + embedding-dim`. For example,\n            if `shape(ids) = [batch_size, max_time]`\n            and `shape(embedding) = [vocab_size, emb_dim]`, then the return\n            tensor has shape `[batch_size, max_time, emb_dim]`.\n\n            If :attr:`soft_ids` is given, returns a Tensor of shape\n            `shape(soft_ids)[:-1] + embdding-dim`. For example,\n            if `shape(soft_ids) = [batch_size, max_time, vocab_size]`\n            and `shape(embedding) = [vocab_size, emb_dim]`, then the return\n            tensor has shape `[batch_size, max_time, emb_dim]`.\n        \"\"\"\n        if ids is not None:\n            if soft_ids is not None:\n                raise ValueError(\n                    'Must not specify `ids` and `soft_ids` at the same time.')\n            ids_rank = get_rank(ids)\n        elif soft_ids is not None:\n            ids_rank = get_rank(soft_ids) - 1\n        else:\n            raise ValueError('Either `ids` or `soft_ids` must be given.')\n\n        embedding = self._embedding\n\n        is_training = is_train_mode(mode)\n        if self._hparams.dropout_strategy == 'item_type':\n            dropout_layer = self._get_dropout_layer(self._hparams)\n            if dropout_layer:\n                embedding = dropout_layer.apply(inputs=embedding,\n                                                training=is_training)\n\n        if ids is not None:\n            outputs = tf.nn.embedding_lookup(embedding, ids, **kwargs)\n        else:\n            outputs = embedder_utils.soft_embedding_lookup(embedding, soft_ids)\n\n        if self._hparams.dropout_strategy != 'item_type':\n            dropout_layer = self._get_dropout_layer(\n                self._hparams, ids_rank=ids_rank, dropout_input=outputs)\n            if dropout_layer:\n                outputs = dropout_layer.apply(\n                    inputs=outputs, training=is_training)\n\n        return outputs\n\n    @property\n    def embedding(self):\n        \"\"\"The embedding tensor, of shape `[vocab_size] + dim`.\n        \"\"\"\n        return self._embedding\n\n    @property\n    def dim(self):\n        \"\"\"The embedding dimension.\n        \"\"\"\n        return self._dim\n\n    @property\n    def vocab_size(self):\n        \"\"\"The vocabulary size.\n        \"\"\"\n        return self._vocab_size\n\n"
  },
  {
    "path": "texar_repo/texar/modules/embedders/embedders_test.py",
    "content": "#\n\"\"\"\nUnit tests for embedders.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\n# pylint: disable=no-member\n\nimport numpy as np\n\nimport tensorflow as tf\n\nfrom texar.modules.embedders.embedders import WordEmbedder\nfrom texar.modules.embedders.position_embedders import PositionEmbedder\nfrom texar.context import global_mode\n\nclass EmbedderTest(tf.test.TestCase):\n    \"\"\"Tests parameterized embedder.\n    \"\"\"\n\n    def _test_word_embedder(self, hparams):\n        \"\"\"Tests :class:`texar.modules.WordEmbedder`.\n        \"\"\"\n        embedder = WordEmbedder(\n            vocab_size=100, hparams=hparams)\n\n        inputs = tf.ones([64, 16], dtype=tf.int32)\n        outputs = embedder(inputs)\n\n        inputs_soft = tf.ones([64, 16, embedder.vocab_size], dtype=tf.float32)\n        outputs_soft = embedder(soft_ids=inputs_soft)\n\n        emb_dim = embedder.dim\n        if not isinstance(emb_dim, (list, tuple)):\n            emb_dim = [emb_dim]\n\n        hparams_dim = hparams[\"dim\"]\n        if not isinstance(hparams[\"dim\"], (list, tuple)):\n            hparams_dim = [hparams[\"dim\"]]\n\n        self.assertEqual(outputs.shape, [64, 16] + emb_dim)\n        self.assertEqual(outputs_soft.shape, [64, 16] + emb_dim)\n        self.assertEqual(emb_dim, hparams_dim)\n        self.assertEqual(embedder.vocab_size, 100)\n        self.assertEqual(len(embedder.trainable_variables), 1)\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            outputs_, outputs_soft_ = sess.run(\n                [outputs, outputs_soft],\n                feed_dict={global_mode(): tf.estimator.ModeKeys.TRAIN})\n            self.assertEqual(outputs_.shape, (64, 16) + tuple(emb_dim))\n            self.assertEqual(outputs_soft_.shape, (64, 16) + tuple(emb_dim))\n\n        # Tests unknown input shapes\n        inputs = tf.placeholder(dtype=tf.int64, shape=[None, None])\n        outputs = embedder(inputs)\n        self.assertEqual(len(outputs.get_shape()), 2 + len(hparams_dim))\n\n        inputs_soft = tf.placeholder(dtype=tf.int64, shape=[None, None, None])\n        outputs_soft = embedder(soft_ids=inputs_soft)\n        self.assertEqual(len(outputs_soft.get_shape()), 2 + len(hparams_dim))\n\n\n    def _test_position_embedder(self, hparams):\n        \"\"\"Tests :class:`texar.modules.PositionEmbedder`.\n        \"\"\"\n        pos_size = 100\n        embedder = PositionEmbedder(\n            position_size=pos_size, hparams=hparams)\n        inputs = tf.ones([64, 16], dtype=tf.int32)\n        outputs = embedder(inputs)\n\n        emb_dim = embedder.dim\n        if not isinstance(emb_dim, (list, tuple)):\n            emb_dim = [emb_dim]\n\n        hparams_dim = hparams[\"dim\"]\n        if not isinstance(hparams[\"dim\"], (list, tuple)):\n            hparams_dim = [hparams[\"dim\"]]\n\n        self.assertEqual(outputs.shape, [64, 16] + emb_dim)\n        self.assertEqual(emb_dim, hparams_dim)\n        self.assertEqual(embedder.position_size, 100)\n        self.assertEqual(len(embedder.trainable_variables), 1)\n\n        seq_length = tf.random_uniform([64], maxval=pos_size, dtype=tf.int32)\n        outputs = embedder(sequence_length=seq_length)\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            outputs_, max_seq_length = sess.run(\n                [outputs, tf.reduce_max(seq_length)],\n                feed_dict={global_mode(): tf.estimator.ModeKeys.TRAIN})\n            self.assertEqual(outputs_.shape,\n                             (64, max_seq_length) + tuple(emb_dim))\n\n    def test_embedder(self):\n        \"\"\"Tests various embedders.\n        \"\"\"\n        # no dropout\n        hparams = {\"dim\": 1024, \"dropout_rate\": 0}\n        self._test_word_embedder(hparams)\n        self._test_position_embedder(hparams)\n\n        hparams = {\"dim\": [1024], \"dropout_rate\": 0}\n        self._test_word_embedder(hparams)\n        self._test_position_embedder(hparams)\n\n        hparams = {\"dim\": [1024, 10], \"dropout_rate\": 0}\n        self._test_word_embedder(hparams)\n        self._test_position_embedder(hparams)\n\n        # dropout with default strategy\n        hparams = {\"dim\": 1024, \"dropout_rate\": 0.3}\n        self._test_word_embedder(hparams)\n        self._test_position_embedder(hparams)\n\n        hparams = {\"dim\": [1024], \"dropout_rate\": 0.3}\n        self._test_word_embedder(hparams)\n        self._test_position_embedder(hparams)\n\n        hparams = {\"dim\": [1024, 10], \"dropout_rate\": 0.3}\n        self._test_word_embedder(hparams)\n        self._test_position_embedder(hparams)\n\n        # dropout with different strategies\n        hparams = {\"dim\": 1024, \"dropout_rate\": 0.3,\n                   \"dropout_strategy\": \"item\"}\n        self._test_word_embedder(hparams)\n        self._test_position_embedder(hparams)\n\n        hparams = {\"dim\": [1024], \"dropout_rate\": 0.3,\n                   \"dropout_strategy\": \"item\"}\n        self._test_word_embedder(hparams)\n        self._test_position_embedder(hparams)\n\n        hparams = {\"dim\": [1024, 10], \"dropout_rate\": 0.3,\n                   \"dropout_strategy\": \"item\"}\n        self._test_word_embedder(hparams)\n        self._test_position_embedder(hparams)\n\n        hparams = {\"dim\": 1024, \"dropout_rate\": 0.3,\n                   \"dropout_strategy\": \"item_type\"}\n        self._test_word_embedder(hparams)\n        self._test_position_embedder(hparams)\n\n        hparams = {\"dim\": [1024], \"dropout_rate\": 0.3,\n                   \"dropout_strategy\": \"item_type\"}\n        self._test_word_embedder(hparams)\n        self._test_position_embedder(hparams)\n\n        hparams = {\"dim\": [1024, 10], \"dropout_rate\": 0.3,\n                   \"dropout_strategy\": \"item_type\"}\n        self._test_word_embedder(hparams)\n        self._test_position_embedder(hparams)\n\n    def test_embedder_multi_calls(self):\n        \"\"\"Tests embedders called by multiple times.\n        \"\"\"\n        hparams = {\"dim\": 1024, \"dropout_rate\": 0.3,\n                   \"dropout_strategy\": \"item\"}\n        embedder = WordEmbedder(\n            vocab_size=100, hparams=hparams)\n        inputs = tf.ones([64, 16], dtype=tf.int32)\n        outputs = embedder(inputs)\n\n        emb_dim = embedder.dim\n        if not isinstance(emb_dim, (list, tuple)):\n            emb_dim = [emb_dim]\n        self.assertEqual(outputs.shape, [64, 16] + emb_dim)\n\n        # Call with inputs in a different shape\n        inputs = tf.ones([64, 10, 20], dtype=tf.int32)\n        outputs = embedder(inputs)\n\n        emb_dim = embedder.dim\n        if not isinstance(emb_dim, (list, tuple)):\n            emb_dim = [emb_dim]\n        self.assertEqual(outputs.shape, [64, 10, 20] + emb_dim)\n\n    def test_word_embedder_soft_ids(self):\n        \"\"\"Tests the correctness of using soft ids.\n        \"\"\"\n        init_value = np.expand_dims(np.arange(5), 1)\n        embedder = WordEmbedder(init_value=init_value)\n\n        ids = np.array([3])\n        soft_ids = np.array([[0, 0, 0, 1, 0]])\n\n        outputs = embedder(ids=ids)\n        soft_outputs = embedder(soft_ids=soft_ids)\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            outputs_, soft_outputs_ = sess.run([outputs, soft_outputs])\n            self.assertEqual(outputs_, soft_outputs_)\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/modules/embedders/position_embedders.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nVarious position embedders.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport math\n\nimport tensorflow as tf\n\nfrom texar.modules.embedders.embedder_base import EmbedderBase\nfrom texar.modules.embedders import embedder_utils\nfrom texar.utils.mode import is_train_mode\nfrom texar.utils.shapes import mask_sequences\n\n# pylint: disable=arguments-differ, invalid-name\n\n__all__ = [\n    \"PositionEmbedder\",\n    \"SinusoidsPositionEmbedder\",\n]\n\nclass PositionEmbedder(EmbedderBase):\n    \"\"\"Simple position embedder that maps position indexes into embeddings\n    via lookup.\n\n    Either :attr:`init_value` or :attr:`position_size` is required. If both are\n    given, there must be `init_value.shape[0]==position_size`.\n\n    Args:\n        init_value (optional): A `Tensor` or numpy array that contains the\n            initial value of embeddings. It is typically of shape\n            `[position_size, embedding dim]`\n\n            If `None`, embedding is initialized as specified in\n            :attr:`hparams[\"initializer\"]`. Otherwise, the\n            :attr:`\"initializer\"` and :attr:`\"dim\"`\n            hyperparameters in :attr:`hparams` are ignored.\n        position_size (int, optional): The number of possible positions, e.g.,\n            the maximum sequence length. Required if :attr:`init_value` is\n            not given.\n        hparams (dict, optional): Embedder hyperparameters. If it is not\n            specified, the default hyperparameter setting is used. See\n            :attr:`default_hparams` for the sturcture and default values.\n\n\n    .. document private functions\n    .. automethod:: _build\n    \"\"\"\n\n    def __init__(self, init_value=None, position_size=None, hparams=None):\n        EmbedderBase.__init__(self, hparams=hparams)\n\n        if init_value is None and position_size is None:\n            raise ValueError(\n                \"Either `init_value` or `position_size` is required.\")\n\n        self._init_parameterized_embedding(init_value, position_size,\n                                           self._hparams)\n\n        self._position_size = position_size\n        if position_size is None:\n            self._position_size = self._num_embeds\n        if self._position_size != self._num_embeds:\n            raise ValueError(\n                'position_size must equal to init_value.shape[0].'\n                'Got %d and %d' % (self._position_size, self._num_embeds))\n\n        self._built = True\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        .. code-block:: python\n\n            {\n                \"dim\": 100,\n                \"initializer\": {\n                    \"type\": \"random_uniform_initializer\",\n                    \"kwargs\": {\n                        \"minval\": -0.1,\n                        \"maxval\": 0.1,\n                        \"seed\": None\n                    }\n                },\n                \"regularizer\": {\n                    \"type\": \"L1L2\",\n                    \"kwargs\": {\n                        \"l1\": 0.,\n                        \"l2\": 0.\n                    }\n                },\n                \"dropout_rate\": 0,\n                \"trainable\": True,\n                \"name\": \"position_embedder\"\n            }\n\n        The hyperparameters have the same meaning as those in\n        :meth:`texar.modules.WordEmbedder.default_hparams`.\n        \"\"\"\n        hparams = embedder_utils.default_embedding_hparams()\n        hparams[\"name\"] = \"position_embedder\"\n        return hparams\n\n    def _build(self, positions=None, sequence_length=None, mode=None, **kwargs):\n        \"\"\"Embeds the positions.\n\n        Either :attr:`position` or :attr:`sequence_length` is required:\n\n            - If both are given, :attr:`sequence_length` is used to mask out \\\n            embeddings of those time steps beyond the respective sequence \\\n            lengths.\n            - If only :attr:`sequence_length` is given, then positions \\\n            from `0` to `sequence_length-1` are embedded.\n\n        Args:\n            positions (optional): An integer tensor containing the position\n                ids to embed.\n            sequence_length (optional): An integer tensor of shape\n                `[batch_size]`. Time steps beyond\n                the respective sequence lengths will have zero-valued\n                embeddings.\n            mode (optional): A tensor taking value in\n                :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, including\n                `TRAIN`, `EVAL`, and `PREDICT`. If `None`, dropout will be\n                controlled by :func:`texar.global_mode`.\n            kwargs: Additional keyword arguments for\n                :tf_main:`tf.nn.embedding_lookup <nn/embedding_lookup>` besides\n                :attr:`params` and :attr:`ids`.\n\n        Returns:\n            A `Tensor` of shape `shape(inputs) + embedding dimension`.\n        \"\"\"\n        # Gets embedder inputs\n        inputs = positions\n        if positions is None:\n            if sequence_length is None:\n                raise ValueError(\n                    'Either `positions` or `sequence_length` is required.')\n            max_length = tf.reduce_max(sequence_length)\n            single_inputs = tf.range(start=0, limit=max_length, dtype=tf.int32)\n            # Expands `single_inputs` to have shape [batch_size, max_length]\n            expander = tf.expand_dims(tf.ones_like(sequence_length), -1)\n            inputs = expander * tf.expand_dims(single_inputs, 0)\n        ids_rank = len(inputs.shape.dims)\n\n        embedding = self._embedding\n\n        is_training = is_train_mode(mode)\n\n        # Gets dropout strategy\n        st = self._hparams.dropout_strategy\n        if positions is None and st == 'item':\n            # If `inputs` is based on `sequence_length`, then dropout\n            # strategies 'item' and 'item_type' have the same effect, we\n            # use 'item_type' to avoid unknown noise_shape in the 'item'\n            # strategy\n            st = 'item_type'\n\n        # Dropouts as 'item_type' before embedding\n        if st == 'item_type':\n            dropout_layer = self._get_dropout_layer(\n                self._hparams, dropout_strategy=st)\n            if dropout_layer:\n                embedding = dropout_layer.apply(inputs=embedding,\n                                                training=is_training)\n\n        # Embeds\n        outputs = tf.nn.embedding_lookup(embedding, inputs, **kwargs)\n\n        # Dropouts as 'item' or 'elements' after embedding\n        if st != 'item_type':\n            dropout_layer = self._get_dropout_layer(\n                self._hparams, ids_rank=ids_rank, dropout_input=outputs,\n                dropout_strategy=st)\n            if dropout_layer:\n                outputs = dropout_layer.apply(inputs=outputs,\n                                              training=is_training)\n\n        # Optionally masks\n        if sequence_length is not None:\n            outputs = mask_sequences(\n                outputs, sequence_length,\n                tensor_rank=len(inputs.shape.dims) + self._dim_rank)\n\n        return outputs\n\n    @property\n    def embedding(self):\n        \"\"\"The embedding tensor.\n        \"\"\"\n        return self._embedding\n\n    @property\n    def dim(self):\n        \"\"\"The embedding dimension.\n        \"\"\"\n        return self._dim\n\n    @property\n    def position_size(self):\n        \"\"\"The position size, i.e., maximum number of positions.\n        \"\"\"\n        return self._position_size\n\n\nclass SinusoidsPositionEmbedder(EmbedderBase):\n    \"\"\"Sinusoid position embedder that maps position indexes into embeddings\n    via sinusoid calculation. This module does not have trainable parameters.\n    Used in, e.g., :class:`~texar.modules.TransformerEncoder`.\n\n    Each channel of the input Tensor is incremented by a sinusoid of a\n    different frequency and phase.\n    This allows attention to learn to use absolute and relative positions.\n\n    Timing signals should be added to some precursors of both the query\n    and the memory inputs to attention.\n    The use of relative position is possible because sin(x+y) and\n    cos(x+y) can be experessed in terms of y, sin(x) and cos(x).\n    In particular, we use a geometric sequence of timescales starting with\n    min_timescale and ending with max_timescale.  The number of different\n    timescales is equal to dim / 2. For each timescale, we\n    generate the two sinusoidal signals sin(timestep/timescale) and\n    cos(timestep/timescale).  All of these sinusoids are concatenated in\n    the dim dimension.\n\n    .. document private functions\n    .. automethod:: _build\n    \"\"\"\n    def __init__(self, hparams=None):\n        EmbedderBase.__init__(self, hparams=hparams)\n\n    def default_hparams(self):\n        \"\"\"Returns a dictionary of hyperparameters with default values\n        We use a geometric sequence of timescales starting with\n        min_timescale and ending with max_timescale. The number of different\n        timescales is equal to dim/2.\n\n        .. code-block:: python\n\n            {\n                'min_timescale': 1.0,\n                'max_timescale': 10000.0,\n                'dim': 512,\n                'name':'sinusoid_posisiton_embedder',\n            }\n        \"\"\"\n        hparams = {\n            'min_timescale': 1.0,\n            'max_timescale': 1.0e4,\n            'dim': 512,\n            'name':'sinusoid_posisiton_embedder',\n        }\n        return hparams\n\n    def _build(self, positions):\n        \"\"\"Embeds.\n\n        Args:\n            positions (optional): An integer tensor containing the position\n                ids to embed.\n        Returns:\n            A `Tensor` of shape `[1, position_size, dim]`.\n        \"\"\"\n        dim = self._hparams.dim\n        position = tf.to_float(tf.squeeze(positions, axis=0))\n        position_size = tf.shape(position)[0]\n        num_timescales = dim // 2\n        min_timescale = self._hparams.min_timescale\n        max_timescale = self._hparams.max_timescale\n        log_timescale_increment = (\n            math.log(float(max_timescale) / float(min_timescale)) /\n            (tf.to_float(num_timescales) - 1))\n        inv_timescales = min_timescale * tf.exp(\n            tf.to_float(tf.range(num_timescales)) * -log_timescale_increment)\n        scaled_time = tf.expand_dims(position, 1) \\\n            * tf.expand_dims(inv_timescales, 0)\n        signal = tf.concat([tf.sin(scaled_time), tf.cos(scaled_time)], axis=1)\n        signal = tf.pad(signal, [[0, 0], [0, tf.mod(dim, 2)]])\n        signal = tf.reshape(signal, [1, position_size, dim])\n\n        return signal\n\n"
  },
  {
    "path": "texar_repo/texar/modules/encoders/__init__.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nModules of texar library encoders.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=wildcard-import\n\nfrom texar.modules.encoders.encoder_base import *\nfrom texar.modules.encoders.rnn_encoders import *\nfrom texar.modules.encoders.hierarchical_encoders import *\nfrom texar.modules.encoders.transformer_encoders import *\nfrom texar.modules.encoders.multihead_attention import *\nfrom texar.modules.encoders.conv_encoders import *\n"
  },
  {
    "path": "texar_repo/texar/modules/encoders/conv_encoders.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nVarious convolutional network encoders.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom texar.modules.encoders.encoder_base import EncoderBase\nfrom texar.modules.networks.conv_networks import Conv1DNetwork\n\n__all__ = [\n    \"Conv1DEncoder\"\n]\n\nclass Conv1DEncoder(Conv1DNetwork, EncoderBase):\n    \"\"\"Simple Conv-1D encoder which consists of a sequence of conv layers\n    followed with a sequence of dense layers.\n\n    Wraps :class:`~texar.modules.Conv1DNetwork` to be a subclass of\n    :class:`~texar.modules.EncoderBase`. Has exact the same functionality\n    with :class:`~texar.modules.Conv1DNetwork`.\n    \"\"\"\n\n    def __init__(self, hparams=None): # pylint: disable=super-init-not-called\n        Conv1DNetwork.__init__(self, hparams)\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        The same as :meth:`~texar.modules.Conv1DNetwork.default_hparams`\n        of :class:`~texar.modules.Conv1DNetwork`, except that the default name\n        is 'conv_encoder'.\n        \"\"\"\n        hparams = Conv1DNetwork.default_hparams()\n        hparams['name'] = 'conv_encoder'\n        return hparams\n\n"
  },
  {
    "path": "texar_repo/texar/modules/encoders/conv_encoders_test.py",
    "content": "#\n\"\"\"\nUnit tests for conv encoders.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport tensorflow as tf\n\nimport texar as tx\nfrom texar.modules.encoders.conv_encoders import Conv1DEncoder\n\n\nclass Conv1DEncoderTest(tf.test.TestCase):\n    \"\"\"Tests :class:`~texar.modules.Conv1DEncoder` class.\n    \"\"\"\n\n    def test_encode(self):\n        \"\"\"Tests encode.\n        \"\"\"\n        encoder_1 = Conv1DEncoder()\n        self.assertEqual(len(encoder_1.layers), 4)\n        self.assertTrue(isinstance(encoder_1.layer_by_name(\"conv_pool_1\"),\n                                   tx.core.MergeLayer))\n        for layer in encoder_1.layers[0].layers:\n            self.assertTrue(isinstance(layer, tx.core.SequentialLayer))\n\n        inputs_1 = tf.ones([64, 16, 300], tf.float32)\n        outputs_1 = encoder_1(inputs_1)\n        self.assertEqual(outputs_1.shape, [64, 128])\n\n        hparams = {\n            # Conv layers\n            \"num_conv_layers\": 2,\n            \"filters\": 128,\n            \"kernel_size\": [[3, 4, 5], 4],\n            \"other_conv_kwargs\": {\"padding\": \"same\"},\n            # Pooling layers\n            \"pooling\": \"AveragePooling\",\n            \"pool_size\": 2,\n            \"pool_strides\": 1,\n            # Dense layers\n            \"num_dense_layers\": 3,\n            \"dense_size\": [128, 128, 10],\n            \"dense_activation\": \"relu\",\n            \"other_dense_kwargs\": {\"use_bias\": False},\n            # Dropout\n            \"dropout_conv\": [0, 1, 2],\n            \"dropout_dense\": 2\n        }\n        encoder_2 = Conv1DEncoder(hparams)\n        # nlayers = nconv-pool + nconv + npool + ndense + ndropout + flatten\n        self.assertEqual(len(encoder_2.layers), 1+1+1+3+4+1)\n        self.assertTrue(isinstance(encoder_2.layer_by_name(\"conv_pool_1\"),\n                                   tx.core.MergeLayer))\n        for layer in encoder_2.layers[1].layers:\n            self.assertTrue(isinstance(layer, tx.core.SequentialLayer))\n\n        inputs_2 = tf.ones([64, 16, 300], tf.float32)\n        outputs_2 = encoder_2(inputs_2)\n        self.assertEqual(outputs_2.shape, [64, 10])\n\n    def test_unknown_seq_length(self):\n        \"\"\"Tests use of pooling layer when the seq_length dimension of inputs\n        is `None`.\n        \"\"\"\n        encoder_1 = Conv1DEncoder()\n        inputs_1 = tf.placeholder(tf.float32, [64, None, 300])\n        outputs_1 = encoder_1(inputs_1)\n        self.assertEqual(outputs_1.shape, [64, 128])\n\n        hparams = {\n            # Conv layers\n            \"num_conv_layers\": 2,\n            \"filters\": 128,\n            \"kernel_size\": [[3, 4, 5], 4],\n            # Pooling layers\n            \"pooling\": \"AveragePooling\",\n            \"pool_size\": [2, None],\n            # Dense layers\n            \"num_dense_layers\": 1,\n            \"dense_size\": 10,\n        }\n        encoder = Conv1DEncoder(hparams)\n        # nlayers = nconv-pool + nconv + npool + ndense + ndropout + flatten\n        self.assertEqual(len(encoder.layers), 1+1+1+1+1+1)\n        self.assertTrue(isinstance(encoder.layer_by_name('pool_2'),\n                                   tx.core.AverageReducePooling1D))\n\n        inputs = tf.placeholder(tf.float32, [64, None, 300])\n        outputs = encoder(inputs)\n        self.assertEqual(outputs.shape, [64, 10])\n\n        hparams_2 = {\n            # Conv layers\n            \"num_conv_layers\": 1,\n            \"filters\": 128,\n            \"kernel_size\": 4,\n            \"other_conv_kwargs\": {'data_format': 'channels_first'},\n            # Pooling layers\n            \"pooling\": \"MaxPooling\",\n            \"other_pool_kwargs\": {'data_format': 'channels_first'},\n            # Dense layers\n            \"num_dense_layers\": 1,\n            \"dense_size\": 10,\n        }\n        encoder_2 = Conv1DEncoder(hparams_2)\n        inputs_2 = tf.placeholder(tf.float32, [64, 300, None])\n        outputs_2 = encoder_2(inputs_2)\n        self.assertEqual(outputs_2.shape, [64, 10])\n\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/modules/encoders/encoder_base.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nBase class for encoders.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom texar.module_base import ModuleBase\n\n__all__ = [\n    \"EncoderBase\"\n]\n\nclass EncoderBase(ModuleBase):\n    \"\"\"Base class inherited by all encoder classes.\n    \"\"\"\n\n    def __init__(self, hparams=None):\n        ModuleBase.__init__(self, hparams)\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n        \"\"\"\n        return {\n            \"name\": \"encoder\"\n        }\n\n    def _build(self, inputs, *args, **kwargs):\n        \"\"\"Encodes the inputs.\n\n        Args:\n          inputs: Inputs to the encoder.\n          *args: Other arguments.\n          **kwargs: Keyword arguments.\n\n        Returns:\n          Encoding results.\n        \"\"\"\n        raise NotImplementedError\n\n"
  },
  {
    "path": "texar_repo/texar/modules/encoders/hierarchical_encoders.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nVarious encoders that encode data with hierarchical structure.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport collections\n\nimport tensorflow as tf\nfrom tensorflow.contrib.rnn import LSTMStateTuple\nfrom tensorflow.python.util import nest    # pylint: disable=E0611\n\nfrom texar.modules.encoders.encoder_base import EncoderBase\nfrom texar.utils import utils\n\n# pylint: disable=invalid-name, too-many-arguments, too-many-locals\n\n__all__ = [\n    \"HierarchicalRNNEncoder\"\n]\n\nclass HierarchicalRNNEncoder(EncoderBase):\n    \"\"\"A hierarchical encoder that stacks basic RNN encoders into two layers.\n    Can be used to encode long, structured sequences, e.g. paragraphs, dialog\n    history, etc.\n\n    Args:\n        encoder_major (optional): An instance of subclass of\n            :class:`~texar.modules.RNNEncoderBase`\n            The high-level encoder taking final\n            states from low-level encoder as its\n            inputs. If not specified, an encoder\n            is created as specified in\n            :attr:`hparams[\"encoder_major\"]`.\n        encoder_minor (optional): An instance of subclass of\n            :class:`~texar.modules.RNNEncoderBase`\n            The low-level encoder. If not\n            specified, an encoder is created as specified\n            in :attr:`hparams[\"encoder_minor\"]`.\n        hparams (dict or HParams, optional): Hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n\n    See :meth:`_build` for the inputs and outputs of the encoder.\n\n    .. document private functions\n    .. automethod:: _build\n    \"\"\"\n\n    def __init__(self, encoder_major=None, encoder_minor=None,\n                 hparams=None):\n        EncoderBase.__init__(self, hparams)\n\n        encoder_major_hparams = utils.get_instance_kwargs(\n            None, self._hparams.encoder_major_hparams)\n        encoder_minor_hparams = utils.get_instance_kwargs(\n            None, self._hparams.encoder_minor_hparams)\n\n        if encoder_major is not None:\n            self._encoder_major = encoder_major\n        else:\n            with tf.variable_scope(self.variable_scope.name):\n                with tf.variable_scope('encoder_major'):\n                    self._encoder_major = utils.check_or_get_instance(\n                        self._hparams.encoder_major_type,\n                        encoder_major_hparams,\n                        ['texar.modules.encoders', 'texar.custom'])\n\n        if encoder_minor is not None:\n            self._encoder_minor = encoder_minor\n        elif self._hparams.config_share:\n            with tf.variable_scope(self.variable_scope.name):\n                with tf.variable_scope('encoder_minor'):\n                    self._encoder_minor = utils.check_or_get_instance(\n                        self._hparams.encoder_major_type,\n                        encoder_major_hparams,\n                        ['texar.modules.encoders', 'texar.custom'])\n        else:\n            with tf.variable_scope(self.variable_scope.name):\n                with tf.variable_scope('encoder_minor'):\n                    self._encoder_minor = utils.check_or_get_instance(\n                        self._hparams.encoder_minor_type,\n                        encoder_minor_hparams,\n                        ['texar.modules.encoders', 'texar.custom'])\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        .. role:: python(code)\n           :language: python\n\n        .. code-block:: python\n\n            {\n                \"encoder_major_type\": \"UnidirectionalRNNEncoder\",\n                \"encoder_major_hparams\": {},\n                \"encoder_minor_type\": \"UnidirectionalRNNEncoder\",\n                \"encoder_minor_hparams\": {},\n                \"config_share\": False,\n                \"name\": \"hierarchical_encoder_wrapper\"\n            }\n\n        Here:\n\n        \"encoder_major_type\" : str or class or instance\n            The high-level encoder. Can be a RNN encoder class, its name or\n            module path, or a class instance.\n            Ignored if `encoder_major` is given to the encoder constructor.\n\n        \"encoder_major_hparams\" : dict\n            The hyperparameters for the high-level encoder. The high-level\n            encoder is created with\n            :python:`encoder_class(hparams=encoder_major_hparams)`.\n            Ignored if `encoder_major` is given to the encoder constructor,\n            or if \"encoder_major_type\" is an encoder instance.\n\n        \"encoder_minor_type\" : str or class or instance\n            The low-level encoder. Can be a RNN encoder class, its name or\n            module path, or a class instance.\n            Ignored if `encoder_minor` is given to the encoder constructor,\n            or if \"config_share\" is True.\n\n        \"encoder_minor_hparams\" : dict\n            The hyperparameters for the low-level encoder. The high-level\n            encoder is created with\n            :python:`encoder_class(hparams=encoder_minor_hparams)`.\n            Ignored if `encoder_minor` is given to the encoder constructor,\n            or if \"config_share\" is True,\n            or if \"encoder_minor_type\" is an encoder instance.\n\n        \"config_share\":\n            Whether to use encoder_major's hyperparameters\n            to construct encoder_minor.\n\n        \"name\":\n            Name of the encoder.\n        \"\"\"\n        hparams = {\n            \"name\": \"hierarchical_encoder\",\n            \"encoder_major_type\": \"UnidirectionalRNNEncoder\",\n            \"encoder_major_hparams\": {},\n            \"encoder_minor_type\": \"UnidirectionalRNNEncoder\",\n            \"encoder_minor_hparams\": {},\n            \"config_share\": False,\n            \"@no_typecheck\": [\n                'encoder_major_hparams',\n                'encoder_minor_hparams'\n            ]\n        }\n        hparams.update(EncoderBase.default_hparams())\n        return hparams\n\n    def _build(self,\n               inputs,\n               order='btu',\n               medium=None,\n               sequence_length_major=None,\n               sequence_length_minor=None,\n               **kwargs):\n        \"\"\"Encodes the inputs.\n\n        Args:\n            inputs: A 4-D tensor of shape `[B, T, U, dim]`, where\n\n                - B: batch_size\n                - T: the max length of high-level sequences. E.g., the max \\\n                number of utterances in dialog history.\n                - U: the max length of low-level sequences. E.g., the max \\\n                length of each utterance in dialog history.\n                - dim: embedding dimension\n\n                The order of first three dimensions can be changed\n                according to :attr:`order`.\n\n            order: A 3-char string containing 'b', 't', and 'u',\n                that specifies the order of inputs dimensions above.\n                Following four can be accepted:\n\n                    - **'btu'**: None of the encoders are time-major.\n                    - **'utb'**: Both encoders are time-major.\n                    - **'tbu'**: The major encoder is time-major.\n                    - **'ubt'**: The minor encoder is time-major.\n\n            medium (optional): A list of callables that subsequently process the\n                final states of minor encoder and obtain the inputs\n                for the major encoder.\n                If not specified, :meth:`flatten` is used for processing\n                the minor's final states.\n            sequence_length_major (optional): The `sequence_length` argument\n                sent to major encoder. This is a 1-D Tensor of shape\n                `[B]`.\n            sequence_length_minor (optional): The `sequence_length` argument\n                sent to minor encoder. It can be either a 1-D Tensor of shape\n                `[B*T]`, or a 2-D Tensor of shape `[B, T]` or `[T, B]`\n                according to :attr:`order`.\n            **kwargs: Other keyword arguments for the major and minor encoders,\n                such as `initial_state`, etc.\n                Note that `sequence_length`, and `time_major`\n                must not be included here.\n                `time_major` is derived from :attr:`order` automatically.\n                By default, arguments will be sent to both major and minor\n                encoders. To specify which encoder an argument should be sent\n                to, add '_minor'/'_major' as its suffix.\n\n                Note that `initial_state_minor` must have a batch dimension\n                of size `B*T`. If you have an initial state of batch dimension\n                = `T`, use :meth:`tile_initial_state_minor` to tile it\n                according to `order`.\n\n        Returns:\n            A tuple `(outputs, final_state)` by the major encoder.\n\n            See\n            the return values of `_build()` method of respective encoder class\n            for details.\n        \"\"\"\n\n        def _kwargs_split(kwargs):\n            kwargs_minor, kwargs_major = {}, {}\n            for k, v in kwargs.items():\n                if len(k) >= 6 and k[-6:] == ['_minor']:\n                    kwargs_minor[k[:-6]] = v\n                if len(k) >= 6 and k[-6:] == ['_major']:\n                    kwargs_major[k[:-6]] = v\n            return kwargs_minor, kwargs_major\n\n        kwargs_minor, kwargs_major = _kwargs_split(kwargs)\n        if sequence_length_minor is not None:\n            sequence_length_minor = tf.reshape(sequence_length_minor, [-1])\n        kwargs_minor['sequence_length'] = sequence_length_minor\n        kwargs_major['sequence_length'] = sequence_length_major\n\n        expand, shape = self._get_flatten_order(\n            order, kwargs_minor, kwargs_major, tf.shape(inputs))\n\n        inputs = tf.reshape(inputs, shape + [inputs.shape[3]])\n\n        _, states_minor = self._encoder_minor(inputs, **kwargs_minor)\n\n        self.states_minor_before_medium = states_minor\n\n        if medium is None:\n            states_minor = self.flatten(states_minor)\n        else:\n            if not isinstance(medium, collections.Sequence):\n                medium = [medium]\n            for fn in medium:\n                if isinstance(fn, str) and fn == 'flatten':\n                    states_minor = self.flatten(states_minor)\n                else:\n                    states_minor = fn(states_minor)\n\n        self.states_minor_after_medium = states_minor\n\n        states_minor = tf.reshape(\n            states_minor, tf.concat([expand, tf.shape(states_minor)[1:]], 0))\n\n        outputs_major, states_major = self._encoder_major(states_minor,\n                                                          **kwargs_major)\n\n        # Add trainable variables of `self._cell` which may be constructed\n        # externally\n        if not self._built:\n            self._add_trainable_variable(\n                self._encoder_minor.trainable_variables)\n            self._add_trainable_variable(\n                self._encoder_major.trainable_variables)\n            self._built = True\n\n        return outputs_major, states_major\n\n    @staticmethod\n    def tile_initial_state_minor(initial_state, order, inputs_shape):\n        \"\"\"Tiles an initial state to be used for encoder minor.\n\n        The batch dimension of :attr:`initial_state` must equal `T`. The\n        state will be copied for `B` times and used to start encoding each\n        low-level sequence. For example, the first utterance in each dialog\n        history in the batch will have the same initial state.\n\n        Args:\n            initial_state: Initial state with the batch dimension of size `T`.\n            order (str): The dimension order of inputs. Must be the same as\n                used in :meth:`_build`.\n            inputs_shape: Shape of `inputs` for :meth:`_build`. Can usually\n                be Obtained with `tf.shape(inputs)`.\n\n        Returns:\n            A tiled initial state with batch dimension of size `B*T`\n        \"\"\"\n        def _nest_tile(t, multiplier):\n            return nest.map_structure(lambda x: tf.tile(x, multiplier), t)\n\n        if order == 'btu':\n            return _nest_tile(initial_state, inputs_shape[0])\n        elif order == 'ubt':\n            return _nest_tile(initial_state, inputs_shape[1])\n        elif order == 'utb':\n            return tf.contrib.seq2seq.tile_batch(initial_state, inputs_shape[2])\n        elif order == 'tbu':\n            return tf.contrib.seq2seq.tile_batch(initial_state, inputs_shape[1])\n        else:\n            raise ValueError('Unknown order: {}'.format(order))\n\n    @staticmethod\n    def _get_flatten_order(order, kwargs_minor, kwargs_major, shape):\n        if order == 'btu':\n            kwargs_minor.setdefault('time_major', False)\n            kwargs_major.setdefault('time_major', False)\n            expand = shape[0:2]\n            shape = [shape[0] * shape[1], shape[2]]\n        elif order == 'utb':\n            kwargs_minor.setdefault('time_major', True)\n            kwargs_major.setdefault('time_major', True)\n            expand = shape[1:3]\n            shape = [shape[0], shape[1] * shape[2]]\n        elif order == 'tbu':\n            kwargs_minor.setdefault('time_major', False)\n            kwargs_major.setdefault('time_major', True)\n            expand = shape[0:2]\n            shape = [shape[0] * shape[1], shape[2]]\n        elif order == 'ubt':\n            kwargs_minor.setdefault('time_major', True)\n            kwargs_major.setdefault('time_major', False)\n            expand = shape[1:3]\n            shape = [shape[0], shape[1] * shape[2]]\n        else:\n            raise ValueError('Unknown order: {}'.format(order))\n\n        return expand, shape\n\n    @staticmethod\n    def flatten(x):\n        \"\"\"Flattens a cell state by concatenating a sequence of cell\n        states along the last dimension. If the cell states are\n        :tf_main:`LSTMStateTuple <contrib/rnn/LSTMStateTuple>`, only the\n        hidden `LSTMStateTuple.h` is used.\n\n        This process is used by default if :attr:`medium` is not provided\n        to :meth:`_build`.\n        \"\"\"\n        if isinstance(x, LSTMStateTuple):\n            return x.h\n        if isinstance(x, collections.Sequence):\n            return tf.concat(\n                [HierarchicalRNNEncoder.flatten(v) for v in x], -1)\n        else:\n            return x\n\n    @property\n    def encoder_major(self):\n        \"\"\"The high-level encoder.\n        \"\"\"\n        return self._encoder_major\n\n    @property\n    def encoder_minor(self):\n        \"\"\"The low-level encoder.\n        \"\"\"\n        return self._encoder_minor\n"
  },
  {
    "path": "texar_repo/texar/modules/encoders/hierarchical_encoders_test.py",
    "content": "#\n\"\"\"\nUnit tests for RNN encoders.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport tensorflow as tf\n\nfrom texar.modules.encoders.hierarchical_encoders import HierarchicalRNNEncoder\n\n# pylint: disable=too-many-locals\n\nclass HierarchicalRNNEncoderTest(tf.test.TestCase):\n    \"\"\"Tests HierarchicalRNNEncoder\n    \"\"\"\n\n    def test_trainable_variables(self):\n        encoder = HierarchicalRNNEncoder()\n\n        inputs = tf.random_uniform(\n            [3, 2, 3, 4],\n            maxval=1,\n            minval=-1,\n            dtype=tf.float32)\n        _, _ = encoder(inputs)\n\n        self.assertEqual(\n            len(encoder.trainable_variables),\n            len(encoder.encoder_major.trainable_variables) + \\\n            len(encoder.encoder_minor.trainable_variables))\n\n    def test_encode(self):\n        encoder = HierarchicalRNNEncoder()\n\n        batch_size = 16\n        max_major_time = 8\n        max_minor_time = 6\n        dim = 10\n        inputs = tf.random_uniform(\n            [batch_size, max_major_time, max_minor_time, dim],\n            maxval=1,\n            minval=-1,\n            dtype=tf.float32)\n        outputs, state = encoder(inputs)\n\n        cell_dim = encoder.encoder_major.hparams.rnn_cell.kwargs.num_units\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            outputs_, state_ = sess.run([outputs, state])\n            self.assertEqual(state_[0].shape, (batch_size, cell_dim))\n\n    def test_order(self):\n        encoder = HierarchicalRNNEncoder()\n\n        batch_size = 16\n        max_major_time = 8\n        max_minor_time = 6\n        dim = 10\n        inputs = tf.random_uniform(\n            [batch_size, max_major_time, max_minor_time, dim],\n            maxval=1,\n            minval=-1,\n            dtype=tf.float32)\n\n        outputs_1, state_1 = encoder(inputs, order='btu')\n        outputs_2, state_2 = encoder(inputs, order='utb')\n        outputs_3, state_3 = encoder(inputs, order='tbu')\n        outputs_4, state_4 = encoder(inputs, order='ubt')\n\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            sess.run([outputs_1, state_1, outputs_2, state_2,\n                      outputs_3, state_3, outputs_4, state_4])\n\n    def test_depack(self):\n        hparams = {\n            \"encoder_major_type\": \"BidirectionalRNNEncoder\",\n            \"encoder_major_hparams\": {\n                \"rnn_cell_fw\": {\n                    \"type\": \"LSTMCell\",\n                    \"kwargs\": {\n                        \"num_units\": 100\n                    }\n                }\n            }\n        }\n        encoder = HierarchicalRNNEncoder(hparams=hparams)\n\n        batch_size = 16\n        max_major_time = 8\n        max_minor_time = 6\n        dim = 10\n        inputs = tf.random_uniform(\n            [batch_size, max_major_time, max_minor_time, dim],\n            maxval=1,\n            minval=-1,\n            dtype=tf.float32)\n\n        _, _ = encoder(inputs)\n\n        self.assertEqual(\n            encoder.states_minor_before_medium.h.shape[1],\n            encoder.states_minor_after_medium.shape[1])\n\n    def test_encoder_minor_as_birnn(self):\n        \"\"\"Tests encoder_minor as a BidirectionalRNNEncoder\n        \"\"\"\n        hparams = {\n            \"encoder_minor_type\": \"BidirectionalRNNEncoder\",\n            \"encoder_minor_hparams\": {\n                \"rnn_cell_fw\": {\n                    \"type\": \"LSTMCell\",\n                    \"kwargs\": {\n                        \"num_units\": 100\n                    }\n                }\n            },\n            \"encoder_major_hparams\": {\n                \"rnn_cell\": {\n                    \"type\": \"LSTMCell\",\n                    \"kwargs\": {\n                        \"num_units\": 200\n                    }\n                }\n            }\n        }\n        encoder = HierarchicalRNNEncoder(hparams=hparams)\n\n        batch_size = 16\n        max_major_time = 8\n        max_minor_time = 6\n        dim = 10\n        inputs = tf.random_uniform(\n            [batch_size, max_major_time, max_minor_time, dim],\n            maxval=1,\n            minval=-1,\n            dtype=tf.float32)\n\n        outputs, _ = encoder(inputs)\n        self.assertEqual(list(outputs.shape), [16, 8, 200])\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/modules/encoders/multihead_attention.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nTransformer encoders with multihead self attention.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom texar.core import layers\nfrom texar.modules.encoders.encoder_base import EncoderBase\nfrom texar.utils.shapes import shape_list\nfrom texar.utils.mode import is_train_mode\n\n# pylint: disable=too-many-locals, invalid-name, arguments-differ\n# pylint: disable=too-many-arguments\n\n__all__ = [\n    \"MultiheadAttentionEncoder\"\n]\n\nclass MultiheadAttentionEncoder(EncoderBase):\n    \"\"\"Multihead Attention Encoder\n\n    Args:\n        hparams (dict or HParams, optional): Hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n\n    .. document private functions\n    .. automethod:: _build\n    \"\"\"\n    def __init__(self, hparams=None):\n        EncoderBase.__init__(self, hparams)\n        use_bias = self._hparams.use_bias\n\n        with tf.variable_scope(self.variable_scope):\n            if self._hparams.initializer:\n                tf.get_variable_scope().set_initializer(\n                    layers.get_initializer(self._hparams.initializer))\n\n            self.Q_dense = tf.layers.Dense(self._hparams.num_units,\n                                           use_bias=use_bias,\n                                           name='query')\n            self.K_dense = tf.layers.Dense(self._hparams.num_units,\n                                           use_bias=use_bias,\n                                           name='key')\n            self.V_dense = tf.layers.Dense(self._hparams.num_units,\n                                           use_bias=use_bias,\n                                           name='value')\n            self.O_dense = tf.layers.Dense(self._hparams.output_dim,\n                                           use_bias=use_bias,\n                                           name='output')\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        .. code-block:: python\n\n            {\n                \"initializer\": None,\n                'num_heads': 8,\n                'output_dim': 512,\n                'num_units': 512,\n                'dropout_rate': 0.1,\n                'use_bias': False,\n                \"name\": \"multihead_attention\"\n            }\n\n        Here:\n\n        \"initializer\" : dict, optional\n            Hyperparameters of the default initializer that initializes\n            variables created in this module.\n            See :func:`~texar.core.get_initializer` for details.\n\n        \"num_heads\" : int\n            Number of heads for attention calculation.\n\n        \"output_dim\" : int\n            Output dimension of the returned tensor.\n\n        \"num_units\" : int\n            Hidden dimension of the unsplitted attention space.\n            Should be devisible by `num_heads`.\n\n        \"dropout_rate: : float\n            Dropout rate in the attention.\n\n        \"use_bias\": bool\n            Use bias when projecting the key, value and query.\n\n        \"name\" : str\n            Name of the module.\n        \"\"\"\n        return {\n            'initializer': None,\n            'num_heads': 8,\n            'output_dim': 512,\n            'num_units': 512,\n            'dropout_rate': 0.1,\n            'use_bias': False,\n            \"name\": \"multihead_attention\",\n        }\n\n    def _build(self, queries, memory, memory_attention_bias,\n               cache=None, mode=None):\n        \"\"\"Encodes the inputs.\n\n        Args:\n            queries: A 3d tensor with shape of [batch, length_query,\n                depth_query].\n            memory: A 3d tensor with shape of [batch, length_key, depth_key].\n            memory_attention_bias: A 3d tensor with shape of\n                [batch, length_key, num_units].\n            cache: Memory cache only when inferencing the sentence from sractch.\n            mode (optional): A tensor taking value in\n                :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, including\n                `TRAIN`, `EVAL` and `PREDICT`. Controls dropout mode.\n                If `None` (default), :func:`texar.global_mode` is used.\n\n        Returns:\n            A Tensor of shape `[batch_size, max_time, dim]` containing the\n            encoded vectors.\n        \"\"\"\n\n        with tf.variable_scope(self.variable_scope):\n            num_heads = self._hparams.num_heads\n            num_units = self._hparams.num_units\n            if num_units % num_heads:\n                raise ValueError(\"Value depth (%d) must be divisible by \"\n                                 \"the number of attention heads (%d).\" %(\\\n                                 num_units, num_heads))\n            if memory is None:\n                # Self Attention\n                Q = self.Q_dense(queries)\n                K = self.K_dense(queries)\n                V = self.V_dense(queries)\n\n                if cache is not None:\n                    # 'decoder self attention when dynamic decoding'\n                    K = tf.concat([cache['self_keys'], K], axis=1)\n                    V = tf.concat([cache['self_values'], V], axis=1)\n                    cache['self_keys'] = K\n                    cache['self_values'] = V\n            else:\n                # encoder decoder attention\n                Q = self.Q_dense(queries)\n                if cache is not None:\n                    K, V = tf.cond(\n                        tf.equal(tf.shape(cache[\"memory_keys\"])[1], 0),\n                        true_fn=lambda: \\\n                            [self.K_dense(memory), self.V_dense(memory)],\n                        false_fn=lambda: \\\n                            [cache[\"memory_keys\"], cache[\"memory_values\"]])\n                else:\n                    K, V = [self.K_dense(memory), self.V_dense(memory)]\n\n            Q_ = self._split_heads(Q)\n            K_ = self._split_heads(K)\n            V_ = self._split_heads(V)\n            #[batch_size, num_heads, seq_length, memory_depth]\n            key_depth_per_head = num_units // num_heads\n            Q_ *= key_depth_per_head**-0.5\n\n            logits = tf.matmul(Q_, K_, transpose_b=True)\n            if memory_attention_bias is not None:\n                logits += memory_attention_bias\n            weights = tf.nn.softmax(logits, name=\"attention_weights\")\n            weights = tf.layers.dropout(weights,\n                                        rate=self._hparams.dropout_rate,\n                                        training=is_train_mode(mode))\n            outputs = tf.matmul(weights, V_)\n\n            outputs = self._combine_heads(outputs)\n            outputs = self.O_dense(outputs)\n            #(batch_size, length_query, output_dim)\n\n        if not self._built:\n            self._add_internal_trainable_variables()\n            self._built = True\n\n        return outputs\n\n    def _split_heads(self, x):\n        \"\"\"Split channels (dimension 2) into multiple heads,\n        becomes dimension 1).\n\n        Must ensure `x.shape[-1]` can be deviced by num_heads\n        \"\"\"\n        depth = shape_list(x)[-1]\n        splitted_x = tf.reshape(x, [tf.shape(x)[0], tf.shape(x)[1], \\\n            self._hparams.num_heads, depth // self._hparams.num_heads])\n        return tf.transpose(splitted_x, [0, 2, 1, 3])\n\n    def _combine_heads(self, x):\n        \"\"\"\n        Args:\n            x: A Tensor of shape `[batch, num_heads, seq_len, dim]`\n\n        Returns:\n            A Tensor of shape `[batch, seq_len, num_heads * dim]`\n        \"\"\"\n        t = tf.transpose(x, [0, 2, 1, 3]) #[batch, seq_len, num_heads, dim]\n        num_heads, dim = shape_list(t)[-2:]\n        assert num_heads == self._hparams.num_heads\n        return tf.reshape(t, [tf.shape(t)[0], tf.shape(t)[1], num_heads*dim])\n"
  },
  {
    "path": "texar_repo/texar/modules/encoders/rnn_encoders.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nVarious RNN encoders.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport functools\nimport numpy as np\n\nimport tensorflow as tf\nfrom tensorflow.contrib.framework import nest\n\nfrom texar.modules.encoders.encoder_base import EncoderBase\nfrom texar.modules.networks.conv_networks import _to_list\nfrom texar.core import layers\nfrom texar.utils.mode import is_train_mode\nfrom texar.utils.shapes import mask_sequences\nfrom texar.hyperparams import HParams\n\n# pylint: disable=too-many-arguments, too-many-locals, invalid-name, no-member\n\n__all__ = [\n    \"_forward_single_output_layer\",\n    \"RNNEncoderBase\",\n    \"UnidirectionalRNNEncoder\",\n    \"BidirectionalRNNEncoder\"\n]\n\ndef _default_output_layer_hparams():\n    return {\n        \"num_layers\": 0,\n        \"layer_size\": 128,\n        \"activation\": \"identity\",\n        \"final_layer_activation\": None,\n        \"other_dense_kwargs\": None,\n        \"dropout_layer_ids\": [],\n        \"dropout_rate\": 0.5,\n        \"variational_dropout\": False,\n        \"@no_typecheck\": [\"activation\", \"final_layer_activation\",\n                          \"layer_size\", \"dropout_layer_ids\"]\n    }\n\ndef _build_dense_output_layer(hparams):\n    nlayers = hparams.num_layers\n\n    if nlayers <= 0:\n        return None\n\n    layer_size = _to_list(\n        hparams.layer_size, 'output_layer.layer_size', nlayers)\n\n    other_kwargs = hparams.other_dense_kwargs or {}\n    if isinstance(other_kwargs, HParams):\n        other_kwargs = other_kwargs.todict()\n    if not isinstance(other_kwargs, dict):\n        raise ValueError(\n            \"hparams 'output_layer.other_dense_kwargs' must be a dict.\")\n\n    dense_layers = []\n    for i in range(nlayers):\n        if i == nlayers - 1:\n            activation = hparams.final_layer_activation\n        else:\n            activation = hparams.activation\n\n        kwargs_i = {\"units\": layer_size[i],\n                    \"activation\": activation,\n                    \"name\": \"dense_%d\" % (i+1)}\n        kwargs_i.update(other_kwargs)\n\n        layer_hparams = {\"type\": \"Dense\", \"kwargs\": kwargs_i}\n        dense_layers.append(layers.get_layer(hparams=layer_hparams))\n\n    if len(dense_layers) == 1:\n        dense_layers = dense_layers[0]\n\n    return dense_layers\n\ndef _forward_single_output_layer(inputs, input_size, output_layer):\n    \"\"\"Forwards the input through a single output layer.\n\n    Args:\n        inputs: A Tensor of shape `[batch_size, max_time] + input_size` if\n            :attr:`time_major=False`, or shape\n            `[max_time, batch_size] + input_size` if :attr:`time_major=True`.\n        input_size: An `int` or 1D `int` array.\n    \"\"\"\n    dim = np.prod(input_size)\n    inputs_flat = inputs\n    inputs_flat = tf.reshape(inputs_flat, [-1, dim])\n    # Feed to the layer\n    output_flat = output_layer(inputs_flat)\n    output_size = output_layer.compute_output_shape([1, dim]).as_list()[1:]\n    output_size = np.array(output_size)\n    # Reshape output to [batch_size/max_time, max_time/batch_size] + output_size\n    output_shape = tf.concat([tf.shape(inputs)[:2], output_size], axis=0)\n    output = tf.reshape(output_flat, output_shape)\n    return output, output_size\n\ndef _apply_dropout(inputs, time_major, hparams, training):\n    \"\"\"Applies dropout to the inputs.\n\n    :attr:`inputs` is a Tensor of shape `[batch_size, max_time, dim]`\n    if :attr:`time_major=False`, or shape `[max_time, batch_size, dim]`\n    if :attr:`time_major=True`.\n    \"\"\"\n    noise_shape = None\n    if hparams.variational_dropout:\n        if time_major:\n            noise_shape = [1, None, None]\n        else:\n            noise_shape = [None, 1, None]\n    return tf.layers.dropout(inputs, rate=hparams.dropout_rate,\n                             noise_shape=noise_shape, training=training)\n\ndef _forward_output_layers(inputs, input_size, output_layer, time_major,\n                           hparams, mode, sequence_length=None):\n    \"\"\"Forwards inputs through the output layers.\n\n    Args:\n        inputs: A Tensor of shape `[batch_size, max_time] + input_size` if\n            :attr:`time_major=False`, or shape\n            `[max_time, batch_size] + input_size` if :attr:`time_major=True`.\n\n    Returns:\n        A pair :attr:`(outputs, outputs_size), where\n\n        - :attr:`outputs`: A Tensor of shape \\\n          `[batch_size, max_time] + outputs_size`.\n\n        - :attr:`outputs_size`: An `int` or 1D `int` array representing the \\\n          output size.\n    \"\"\"\n    if output_layer is None:\n        return inputs, input_size\n\n    if hparams is None:\n        # output_layer was passed in from the constructor\n        if isinstance(output_layer, (list, tuple)):\n            raise ValueError('output_layer must not be a list or tuple.')\n        output, output_size = _forward_single_output_layer(\n            inputs, input_size, output_layer)\n    else:\n        # output_layer was built based on hparams\n        output_layer = _to_list(output_layer)\n\n        dropout_layer_ids = _to_list(hparams.dropout_layer_ids)\n        if len(dropout_layer_ids) > 0:\n            training = is_train_mode(mode)\n\n        output = inputs\n        output_size = input_size\n        for i, layer in enumerate(output_layer):\n            if i in dropout_layer_ids:\n                output = _apply_dropout(output, time_major, hparams, training)\n            output, output_size = _forward_single_output_layer(\n                output, output_size, layer)\n\n        if len(output_layer) in dropout_layer_ids:\n            output = _apply_dropout(output, time_major, hparams, training)\n\n    if sequence_length is not None:\n        output = mask_sequences(\n            output, sequence_length, time_major=time_major, tensor_rank=3)\n\n    return output, output_size\n\ndef _apply_rnn_encoder_output_layer(output_layer, time_major, hparams, mode,\n                                    cell_outputs, cell_output_size):\n    map_func = functools.partial(\n        _forward_output_layers,\n        output_layer=output_layer,\n        time_major=time_major,\n        hparams=hparams,\n        mode=mode)\n    cell_outputs_flat = nest.flatten(cell_outputs)\n    cell_output_size_flat = nest.flatten(cell_output_size)\n    o = [map_func(inputs=x, input_size=xs)\n         for x, xs in zip(cell_outputs_flat, cell_output_size_flat)]\n    outputs_flat, output_size_flat = zip(*o)\n    outputs = nest.pack_sequence_as(cell_outputs, outputs_flat)\n    output_size = nest.pack_sequence_as(cell_outputs, output_size_flat)\n    return outputs, output_size\n\n\nclass RNNEncoderBase(EncoderBase):\n    \"\"\"Base class for all RNN encoder classes to inherit.\n\n    Args:\n        hparams (dict or HParams, optional): Hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n    \"\"\"\n\n    def __init__(self, hparams=None):\n        EncoderBase.__init__(self, hparams)\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        .. code-block:: python\n\n            {\n                \"name\": \"rnn_encoder\"\n            }\n        \"\"\"\n        return {\n            \"name\": \"rnn_encoder\"\n        }\n\n    def _build(self, inputs, *args, **kwargs):\n        \"\"\"Encodes the inputs.\n\n        Args:\n            inputs: Inputs to the encoder.\n            *args: Other arguments.\n            **kwargs: Keyword arguments.\n\n        Returns:\n            Encoding results.\n        \"\"\"\n        raise NotImplementedError\n\n\nclass UnidirectionalRNNEncoder(RNNEncoderBase):\n    \"\"\"One directional RNN encoder.\n\n    Args:\n        cell: (RNNCell, optional) If not specified,\n            a cell is created as specified in :attr:`hparams[\"rnn_cell\"]`.\n        cell_dropout_mode (optional): A Tensor taking value of\n            :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, which\n            toggles dropout in the RNN cell (e.g., activates dropout in\n            TRAIN mode). If `None`, :func:`~texar.global_mode` is used.\n            Ignored if :attr:`cell` is given.\n        output_layer (optional): An instance of\n            :tf_main:`tf.layers.Layer <layers/Layer>`. Applies to the RNN cell\n            output of each step. If `None` (default), the output layer is\n            created as specified in :attr:`hparams[\"output_layer\"]`.\n        hparams (dict or HParams, optional): Hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n\n    See :meth:`_build` for the inputs and outputs of the encoder.\n\n    Example:\n\n        .. code-block:: python\n\n            # Use with embedder\n            embedder = WordEmbedder(vocab_size, hparams=emb_hparams)\n            encoder = UnidirectionalRNNEncoder(hparams=enc_hparams)\n\n            outputs, final_state = encoder(\n                inputs=embedder(data_batch['text_ids']),\n                sequence_length=data_batch['length'])\n\n    .. document private functions\n    .. automethod:: _build\n    \"\"\"\n\n    def __init__(self,\n                 cell=None,\n                 cell_dropout_mode=None,\n                 output_layer=None,\n                 hparams=None):\n        RNNEncoderBase.__init__(self, hparams)\n\n        # Make RNN cell\n        with tf.variable_scope(self.variable_scope):\n            if cell is not None:\n                self._cell = cell\n            else:\n                self._cell = layers.get_rnn_cell(\n                    self._hparams.rnn_cell, cell_dropout_mode)\n\n        # Make output layer\n        with tf.variable_scope(self.variable_scope):\n            if output_layer is not None:\n                self._output_layer = output_layer\n                self._output_layer_hparams = None\n            else:\n                self._output_layer = _build_dense_output_layer(\n                    self._hparams.output_layer)\n                self._output_layer_hparams = self._hparams.output_layer\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        .. code-block:: python\n\n            {\n                \"rnn_cell\": default_rnn_cell_hparams(),\n                \"output_layer\": {\n                    \"num_layers\": 0,\n                    \"layer_size\": 128,\n                    \"activation\": \"identity\",\n                    \"final_layer_activation\": None,\n                    \"other_dense_kwargs\": None,\n                    \"dropout_layer_ids\": [],\n                    \"dropout_rate\": 0.5,\n                    \"variational_dropout\": False\n                },\n                \"name\": \"unidirectional_rnn_encoder\"\n            }\n\n        Here:\n\n        \"rnn_cell\" : dict\n            A dictionary of RNN cell hyperparameters. Ignored if\n            :attr:`cell` is given to the encoder constructor.\n\n            The default value is defined in\n            :func:`~texar.core.default_rnn_cell_hparams`.\n\n        \"output_layer\" : dict\n            Output layer hyperparameters. Ignored if :attr:`output_layer`\n            is given to the encoder constructor. Includes:\n\n            \"num_layers\" : int\n                The number of output (dense) layers. Set to 0 to avoid any\n                output layers applied to the cell outputs..\n\n            \"layer_size\" : int or list\n                The size of each of the output (dense) layers.\n\n                If an `int`, each output layer will have the same size. If\n                a list, the length must equal to :attr:`num_layers`.\n\n            \"activation\" : str or callable or None\n                Activation function for each of the output (dense)\n                layer except for the final layer. This can be\n                a function, or its string name or module path.\n                If function name is given, the function must be from\n                module :tf_main:`tf.nn <nn>` or :tf_main:`tf < >`.\n                For example\n\n                .. code-block:: python\n\n                    \"activation\": \"relu\" # function name\n                    \"activation\": \"my_module.my_activation_fn\" # module path\n                    \"activation\": my_module.my_activation_fn # function\n\n                Default is `None` which maintains a linear activation.\n\n            \"final_layer_activation\" : str or callable or None\n                The activation function for the final output layer.\n\n            \"other_dense_kwargs\" : dict or None\n                Other keyword arguments to construct each of the output\n                dense layers, e.g., `use_bias`. See\n                :tf_main:`Dense <layers/Dense>` for the keyword arguments.\n\n            \"dropout_layer_ids\" : int or list\n                The indexes of layers (starting from `0`) whose inputs\n                are applied with dropout. The index = :attr:`num_layers`\n                means dropout applies to the final layer output. E.g.,\n\n                .. code-block:: python\n\n                    {\n                        \"num_layers\": 2,\n                        \"dropout_layer_ids\": [0, 2]\n                    }\n\n                will leads to a series of layers as\n                `-dropout-layer0-layer1-dropout-`.\n\n                The dropout mode (training or not) is controlled\n                by the :attr:`mode` argument of :meth:`_build`.\n\n            \"dropout_rate\" : float\n                The dropout rate, between 0 and 1. E.g.,\n                `\"dropout_rate\": 0.1` would drop out 10% of elements.\n\n            \"variational_dropout\": bool\n                Whether the dropout mask is the same across all time steps.\n\n        \"name\" : str\n            Name of the encoder\n        \"\"\"\n        hparams = RNNEncoderBase.default_hparams()\n        hparams.update({\n            \"rnn_cell\": layers.default_rnn_cell_hparams(),\n            \"output_layer\": _default_output_layer_hparams(),\n            \"name\": \"unidirectional_rnn_encoder\"\n        })\n        return hparams\n\n    def _build(self,\n               inputs,\n               sequence_length=None,\n               initial_state=None,\n               time_major=False,\n               mode=None,\n               return_cell_output=False,\n               return_output_size=False,\n               **kwargs):\n        \"\"\"Encodes the inputs.\n\n        Args:\n            inputs: A 3D Tensor of shape `[batch_size, max_time, dim]`.\n                The first two dimensions\n                :attr:`batch_size` and :attr:`max_time` are exchanged if\n                :attr:`time_major=True` is specified.\n            sequence_length (optional): A 1D int tensor of shape `[batch_size]`.\n                Sequence lengths\n                of the batch inputs. Used to copy-through state and zero-out\n                outputs when past a batch element's sequence length.\n            initial_state (optional): Initial state of the RNN.\n            time_major (bool): The shape format of the :attr:`inputs` and\n                :attr:`outputs` Tensors. If `True`, these tensors are of shape\n                `[max_time, batch_size, depth]`. If `False` (default),\n                these tensors are of shape `[batch_size, max_time, depth]`.\n            mode (optional): A tensor taking value in\n                :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, including\n                `TRAIN`, `EVAL`, and `PREDICT`. Controls output layer dropout\n                if the output layer is specified with :attr:`hparams`.\n                If `None` (default), :func:`texar.global_mode`\n                is used.\n            return_cell_output (bool): Whether to return the output of the RNN\n                cell. This is the results prior to the output layer.\n            return_output_size (bool): Whether to return the size of the\n                output (i.e., the results after output layers).\n            **kwargs: Optional keyword arguments of\n                :tf_main:`tf.nn.dynamic_rnn <nn/dynamic_rnn>`,\n                such as `swap_memory`, `dtype`, `parallel_iterations`, etc.\n\n        Returns:\n            - By default (both `return_cell_output` and \\\n            `return_output_size` are False), returns a pair \\\n            :attr:`(outputs, final_state)`\n\n                - :attr:`outputs`: The RNN output tensor by the output layer \\\n                (if exists) or the RNN cell (otherwise). The tensor is of \\\n                shape `[batch_size, max_time, output_size]` if \\\n                `time_major` is False, or \\\n                `[max_time, batch_size, output_size]` if \\\n                `time_major` is True. \\\n                If RNN cell output is a (nested) tuple of Tensors, then the \\\n                :attr:`outputs` will be a (nested) tuple having the same \\\n                nest structure as the cell output.\n\n                - :attr:`final_state`: The final state of the RNN, which is a \\\n                Tensor of shape `[batch_size] + cell.state_size` or \\\n                a (nested) tuple of Tensors if `cell.state_size` is a (nested)\\\n                tuple.\n\n            - If `return_cell_output` is True, returns a triple \\\n            :attr:`(outputs, final_state, cell_outputs)`\n\n                - :attr:`cell_outputs`: The outputs by the RNN cell prior to \\\n                the \\\n                output layer, having the same structure with :attr:`outputs` \\\n                except for the `output_dim`.\n\n            - If `return_output_size` is `True`, returns a tuple \\\n            :attr:`(outputs, final_state, output_size)`\n\n                - :attr:`output_size`: A (possibly nested tuple of) int \\\n                representing the size of :attr:`outputs`. If a single int or \\\n                an int array, then `outputs` has shape \\\n                `[batch/time, time/batch] + output_size`. If \\\n                a (nested) tuple, then `output_size` has the same \\\n                structure as with `outputs`.\n\n            - If both `return_cell_output` and \\\n            `return_output_size` are True, returns \\\n            :attr:`(outputs, final_state, cell_outputs, output_size)`.\n        \"\"\"\n        if ('dtype' not in kwargs) and (initial_state is None):\n            cell_outputs, state = tf.nn.dynamic_rnn(\n                cell=self._cell,\n                inputs=inputs,\n                sequence_length=sequence_length,\n                initial_state=initial_state,\n                time_major=time_major,\n                dtype=tf.float32,\n                **kwargs)\n        else:\n            cell_outputs, state = tf.nn.dynamic_rnn(\n                cell=self._cell,\n                inputs=inputs,\n                sequence_length=sequence_length,\n                initial_state=initial_state,\n                time_major=time_major,\n                **kwargs)\n\n        outputs, output_size = _apply_rnn_encoder_output_layer(\n            self._output_layer, time_major, self._output_layer_hparams,\n            mode, cell_outputs, self._cell.output_size)\n\n        if not self._built:\n            self._add_internal_trainable_variables()\n            # Add trainable variables of `self._cell` and `self._output_layer`\n            # which may be constructed externally.\n            self._add_trainable_variable(\n                layers.get_rnn_cell_trainable_variables(self._cell))\n            if self._output_layer and \\\n                    not isinstance(self._output_layer, (list, tuple)):\n                self._add_trainable_variable(\n                    self._output_layer.trainable_variables)\n            self._built = True\n\n        rets = (outputs, state)\n        if return_cell_output:\n            rets += (cell_outputs, )\n        if return_output_size:\n            rets += (output_size, )\n        return rets\n\n    @property\n    def cell(self):\n        \"\"\"The RNN cell.\n        \"\"\"\n        return self._cell\n\n    @property\n    def state_size(self):\n        \"\"\"The state size of encoder cell.\n\n        Same as :attr:`encoder.cell.state_size`.\n        \"\"\"\n        return self.cell.state_size\n\n    @property\n    def output_layer(self):\n        \"\"\"The output layer.\n        \"\"\"\n        return self._output_layer\n\nclass BidirectionalRNNEncoder(RNNEncoderBase):\n    \"\"\"Bidirectional forward-backward RNN encoder.\n\n    Args:\n        cell_fw (RNNCell, optional): The forward RNN cell. If not given,\n            a cell is created as specified in :attr:`hparams[\"rnn_cell_fw\"]`.\n        cell_bw (RNNCell, optional): The backward RNN cell. If not given,\n            a cell is created as specified in :attr:`hparams[\"rnn_cell_bw\"]`.\n        cell_dropout_mode (optional): A tensor taking value of\n            :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, which\n            toggles dropout in the RNN cells (e.g., activates dropout in\n            TRAIN mode). If `None`, :func:`~texar.global_mode()` is\n            used. Ignored if respective cell is given.\n        output_layer_fw (optional): An instance of\n            :tf_main:`tf.layers.Layer <layers/Layer>`. Apply to the forward\n            RNN cell output of each step. If `None` (default), the output\n            layer is created as specified in :attr:`hparams[\"output_layer_fw\"]`.\n        output_layer_bw (optional): An instance of\n            :tf_main:`tf.layers.Layer <layers/Layer>`. Apply to the backward\n            RNN cell output of each step. If `None` (default), the output\n            layer is created as specified in :attr:`hparams[\"output_layer_bw\"]`.\n        hparams (dict or HParams, optional): Hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n\n    See :meth:`_build` for the inputs and outputs of the encoder.\n\n    Example:\n\n        .. code-block:: python\n\n            # Use with embedder\n            embedder = WordEmbedder(vocab_size, hparams=emb_hparams)\n            encoder = BidirectionalRNNEncoder(hparams=enc_hparams)\n\n            outputs, final_state = encoder(\n                inputs=embedder(data_batch['text_ids']),\n                sequence_length=data_batch['length'])\n            # outputs == (outputs_fw, outputs_bw)\n            # final_state == (final_state_fw, final_state_bw)\n\n    .. document private functions\n    .. automethod:: _build\n    \"\"\"\n\n    def __init__(self,\n                 cell_fw=None,\n                 cell_bw=None,\n                 cell_dropout_mode=None,\n                 output_layer_fw=None,\n                 output_layer_bw=None,\n                 hparams=None):\n        RNNEncoderBase.__init__(self, hparams)\n\n        # Make RNN cells\n        with tf.variable_scope(self.variable_scope):\n            if cell_fw is not None:\n                self._cell_fw = cell_fw\n            else:\n                self._cell_fw = layers.get_rnn_cell(\n                    self._hparams.rnn_cell_fw, cell_dropout_mode)\n\n            if cell_bw is not None:\n                self._cell_bw = cell_bw\n            elif self._hparams.rnn_cell_share_config:\n                self._cell_bw = layers.get_rnn_cell(\n                    self._hparams.rnn_cell_fw, cell_dropout_mode)\n            else:\n                self._cell_bw = layers.get_rnn_cell(\n                    self._hparams.rnn_cell_bw, cell_dropout_mode)\n\n        # Make output layers\n        with tf.variable_scope(self.variable_scope):\n            if output_layer_fw is not None:\n                self._output_layer_fw = output_layer_fw\n                self._output_layer_hparams_fw = None\n            else:\n                self._output_layer_fw = _build_dense_output_layer(\n                    self._hparams.output_layer_fw)\n                self._output_layer_hparams_fw = self._hparams.output_layer_fw\n\n            if output_layer_bw is not None:\n                self._output_layer_bw = output_layer_bw\n                self._output_layer_hparams_bw = None\n            elif self._hparams.output_layer_share_config:\n                self._output_layer_bw = _build_dense_output_layer(\n                    self._hparams.output_layer_fw)\n                self._output_layer_hparams_bw = self._hparams.output_layer_fw\n            else:\n                self._output_layer_bw = _build_dense_output_layer(\n                    self._hparams.output_layer_bw)\n                self._output_layer_hparams_bw = self._hparams.output_layer_bw\n\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        .. code-block:: python\n\n            {\n                \"rnn_cell_fw\": default_rnn_cell_hparams(),\n                \"rnn_cell_bw\": default_rnn_cell_hparams(),\n                \"rnn_cell_share_config\": True,\n                \"output_layer_fw\": {\n                    \"num_layers\": 0,\n                    \"layer_size\": 128,\n                    \"activation\": \"identity\",\n                    \"final_layer_activation\": None,\n                    \"other_dense_kwargs\": None,\n                    \"dropout_layer_ids\": [],\n                    \"dropout_rate\": 0.5,\n                    \"variational_dropout\": False\n                },\n                \"output_layer_bw\": {\n                    # Same hyperparams and default values as \"output_layer_fw\"\n                    # ...\n                },\n                \"output_layer_share_config\": True,\n                \"name\": \"bidirectional_rnn_encoder\"\n            }\n\n        Here:\n\n        \"rnn_cell_fw\" : dict\n            Hyperparameters of the forward RNN cell.\n            Ignored if :attr:`cell_fw` is given to the encoder constructor.\n\n            The default value is defined in\n            :func:`~texar.core.default_rnn_cell_hparams`.\n\n        \"rnn_cell_bw\" : dict\n            Hyperparameters of the backward RNN cell.\n            Ignored if :attr:`cell_bw` is given to the encoder constructor\n            , or if :attr:`\"rnn_cell_share_config\"` is `True`.\n\n            The default value is defined in\n            :meth:`~texar.core.default_rnn_cell_hparams`.\n\n        \"rnn_cell_share_config\" : bool\n            Whether share hyperparameters of the backward cell with the\n            forward cell. Note that the cell parameters (variables) are not\n            shared.\n\n        \"output_layer_fw\" : dict\n            Hyperparameters of the forward output layer. Ignored if\n            :attr:`output_layer_fw` is given to the constructor.\n            See the \"output_layer\" field of\n            :meth:`~texar.modules.UnidirectionalRNNEncoder.default_hparams` for\n            details.\n\n        \"output_layer_bw\" : dict\n            Hyperparameters of the backward output layer. Ignored if\n            :attr:`output_layer_bw` is given to the constructor. Have the\n            same structure and defaults with :attr:`\"output_layer_fw\"`.\n\n            Ignored if :attr:`\"output_layer_share_config\"` is True.\n\n        \"output_layer_share_config\" : bool\n            Whether share hyperparameters of the backward output layer\n            with the forward output layer. Note that the layer parameters\n            (variables) are not shared.\n\n        \"name\" : str\n            Name of the encoder\n        \"\"\"\n        hparams = RNNEncoderBase.default_hparams()\n        hparams.update({\n            \"rnn_cell_fw\": layers.default_rnn_cell_hparams(),\n            \"rnn_cell_bw\": layers.default_rnn_cell_hparams(),\n            \"rnn_cell_share_config\": True,\n            \"output_layer_fw\": _default_output_layer_hparams(),\n            \"output_layer_bw\": _default_output_layer_hparams(),\n            \"output_layer_share_config\": True,\n            \"name\": \"bidirectional_rnn_encoder\"\n        })\n        return hparams\n\n    def _build(self,\n               inputs,\n               sequence_length=None,\n               initial_state_fw=None,\n               initial_state_bw=None,\n               time_major=False,\n               mode=None,\n               return_cell_output=False,\n               return_output_size=False,\n               **kwargs):\n        \"\"\"Encodes the inputs.\n\n        Args:\n            inputs: A 3D Tensor of shape `[batch_size, max_time, dim]`.\n                The first two dimensions\n                `batch_size` and `max_time` may be exchanged if\n                `time_major=True` is specified.\n            sequence_length (optional): A 1D int tensor of shape `[batch_size]`.\n                Sequence lengths\n                of the batch inputs. Used to copy-through state and zero-out\n                outputs when past a batch element's sequence length.\n            initial_state (optional): Initial state of the RNN.\n            time_major (bool): The shape format of the :attr:`inputs` and\n                :attr:`outputs` Tensors. If `True`, these tensors are of shape\n                `[max_time, batch_size, depth]`. If `False` (default),\n                these tensors are of shape `[batch_size, max_time, depth]`.\n            mode (optional): A tensor taking value in\n                :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, including\n                `TRAIN`, `EVAL`, and `PREDICT`. Controls output layer dropout\n                if the output layer is specified with :attr:`hparams`.\n                If `None` (default), :func:`texar.global_mode()`\n                is used.\n            return_cell_output (bool): Whether to return the output of the RNN\n                cell. This is the results prior to the output layer.\n            **kwargs: Optional keyword arguments of\n                :tf_main:`tf.nn.dynamic_rnn <nn/dynamic_rnn>`,\n                such as `swap_memory`, `dtype`, `parallel_iterations`, etc.\n\n        Returns:\n            - By default (both `return_cell_output` and `return_output_size` \\\n            are False), returns a pair :attr:`(outputs, final_state)`\n\n                - :attr:`outputs`: A tuple `(outputs_fw, outputs_bw)` \\\n                containing \\\n                the forward and the backward RNN outputs, each of which is of \\\n                shape `[batch_size, max_time, output_dim]` if \\\n                `time_major` is False, or \\\n                `[max_time, batch_size, output_dim]` if \\\n                `time_major` is True. \\\n                If RNN cell output is a (nested) tuple of Tensors, then \\\n                `outputs_fw` and `outputs_bw` will be a (nested) tuple having \\\n                the same structure as the cell output.\n\n                - :attr:`final_state`: A tuple \\\n                `(final_state_fw, final_state_bw)` \\\n                containing the final states of the forward and backward \\\n                RNNs, each of which is a \\\n                Tensor of shape `[batch_size] + cell.state_size`, or \\\n                a (nested) tuple of Tensors if `cell.state_size` is a (nested)\\\n                tuple.\n\n            - If `return_cell_output` is True, returns a triple \\\n            :attr:`(outputs, final_state, cell_outputs)` where\n\n                - :attr:`cell_outputs`: A tuple \\\n                `(cell_outputs_fw, cell_outputs_bw)` containting the outputs \\\n                by the forward and backward RNN cells prior to the \\\n                output layers, having the same structure with :attr:`outputs` \\\n                except for the `output_dim`.\n\n            - If `return_output_size` is True, returns a tuple \\\n            :attr:`(outputs, final_state, output_size)` where\n\n                - :attr:`output_size`: A tupple \\\n                `(output_size_fw, output_size_bw)` containing the size of \\\n                `outputs_fw` and `outputs_bw`, respectively. \\\n                Take `*_fw` for example, \\\n                `output_size_fw` is a (possibly nested tuple of) int. \\\n                If a single int or an int array, then `outputs_fw` has shape \\\n                `[batch/time, time/batch] + output_size_fw`. If \\\n                a (nested) tuple, then `output_size_fw` has the same \\\n                structure as with `outputs_fw`. The same applies to  \\\n                `output_size_bw`.\n\n            - If both `return_cell_output` and \\\n            `return_output_size` are True, returns \\\n            :attr:`(outputs, final_state, cell_outputs, output_size)`.\n        \"\"\"\n        no_initial_state = initial_state_fw is None and initial_state_bw is None\n        if ('dtype' not in kwargs) and no_initial_state:\n            cell_outputs, states = tf.nn.bidirectional_dynamic_rnn(\n                cell_fw=self._cell_fw,\n                cell_bw=self._cell_bw,\n                inputs=inputs,\n                sequence_length=sequence_length,\n                initial_state_fw=initial_state_fw,\n                initial_state_bw=initial_state_bw,\n                time_major=time_major,\n                dtype=tf.float32,\n                **kwargs)\n        else:\n            cell_outputs, states = tf.nn.bidirectional_dynamic_rnn(\n                cell_fw=self._cell_fw,\n                cell_bw=self._cell_bw,\n                inputs=inputs,\n                sequence_length=sequence_length,\n                initial_state_fw=initial_state_fw,\n                initial_state_bw=initial_state_bw,\n                time_major=time_major,\n                **kwargs)\n\n        outputs_fw, output_size_fw = _apply_rnn_encoder_output_layer(\n            self._output_layer_fw, time_major, self._output_layer_hparams_fw,\n            mode, cell_outputs[0], self._cell_fw.output_size)\n\n        outputs_bw, output_size_bw = _apply_rnn_encoder_output_layer(\n            self._output_layer_bw, time_major, self._output_layer_hparams_bw,\n            mode, cell_outputs[1], self._cell_bw.output_size)\n\n        outputs = (outputs_fw, outputs_bw)\n        output_size = (output_size_fw, output_size_bw)\n\n        if not self._built:\n            self._add_internal_trainable_variables()\n            # Add trainable variables of cells and output layers\n            # which may be constructed externally.\n            self._add_trainable_variable(\n                layers.get_rnn_cell_trainable_variables(self._cell_fw))\n            self._add_trainable_variable(\n                layers.get_rnn_cell_trainable_variables(self._cell_bw))\n            if self._output_layer_fw and \\\n                    not isinstance(self._output_layer_fw, (list, tuple)):\n                self._add_trainable_variable(\n                    self._output_layer_fw.trainable_variables)\n            if self._output_layer_bw and \\\n                    not isinstance(self._output_layer_bw, (list, tuple)):\n                self._add_trainable_variable(\n                    self._output_layer_bw.trainable_variables)\n            self._built = True\n\n        returns = (outputs, states)\n        if return_cell_output:\n            returns += (cell_outputs, )\n        if return_output_size:\n            returns += (output_size, )\n        return returns\n\n    @property\n    def cell_fw(self):\n        \"\"\"The forward RNN cell.\n        \"\"\"\n        return self._cell_fw\n\n    @property\n    def cell_bw(self):\n        \"\"\"The backward RNN cell.\n        \"\"\"\n        return self._cell_bw\n\n    @property\n    def state_size_fw(self):\n        \"\"\"The state size of the forward encoder cell.\n\n        Same as :attr:`encoder.cell_fw.state_size`.\n        \"\"\"\n        return self.cell_fw.state_size\n\n    @property\n    def state_size_bw(self):\n        \"\"\"The state size of the backward encoder cell.\n\n        Same as :attr:`encoder.cell_bw.state_size`.\n        \"\"\"\n        return self.cell_bw.state_size\n\n    @property\n    def output_layer_fw(self):\n        \"\"\"The output layer of the forward RNN.\n        \"\"\"\n        return self._output_layer_fw\n\n    @property\n    def output_layer_bw(self):\n        \"\"\"The output layer of the backward RNN.\n        \"\"\"\n        return self._output_layer_bw\n"
  },
  {
    "path": "texar_repo/texar/modules/encoders/rnn_encoders_test.py",
    "content": "#\n\"\"\"\nUnit tests for RNN encoders.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport tensorflow as tf\n\nfrom texar.modules.encoders.rnn_encoders import UnidirectionalRNNEncoder\nfrom texar.modules.encoders.rnn_encoders import BidirectionalRNNEncoder\n#from texar.modules.encoders.rnn_encoders import HierarchicalForwardRNNEncoder\nfrom texar.modules.embedders.embedders import WordEmbedder\n\n# pylint: disable=too-many-locals\n\nclass UnidirectionalRNNEncoderTest(tf.test.TestCase):\n    \"\"\"Tests :class:`~texar.modules.UnidirectionalRNNEncoder` class.\n    \"\"\"\n\n    def test_trainable_variables(self):\n        \"\"\"Tests the functionality of automatically collecting trainable\n        variables.\n        \"\"\"\n        inputs = tf.placeholder(dtype=tf.float32, shape=[None, None, 100])\n\n        # case 1\n        encoder = UnidirectionalRNNEncoder()\n        _, _ = encoder(inputs)\n        self.assertEqual(len(encoder.trainable_variables), 2)\n\n        # case 2\n        hparams = {\n            \"rnn_cell\": {\n                \"dropout\": {\n                    \"input_keep_prob\": 0.5\n                }\n            }\n        }\n        encoder = UnidirectionalRNNEncoder(hparams=hparams)\n        _, _ = encoder(inputs)\n        self.assertEqual(len(encoder.trainable_variables), 2)\n\n        # case 3\n        hparams = {\n            \"output_layer\": {\n                \"num_layers\": 2,\n                \"layer_size\": [100, 6],\n                \"activation\": \"relu\",\n                \"final_layer_activation\": \"identity\",\n                \"dropout_layer_ids\": [0, 1, 2],\n                \"variational_dropout\": False\n            }\n        }\n        encoder = UnidirectionalRNNEncoder(hparams=hparams)\n        _, _ = encoder(inputs)\n        self.assertEqual(len(encoder.trainable_variables), 2+2+2)\n        _, _ = encoder(inputs)\n        self.assertEqual(len(encoder.trainable_variables), 2+2+2)\n\n    def test_encode(self):\n        \"\"\"Tests encoding.\n        \"\"\"\n        # case 1\n        encoder = UnidirectionalRNNEncoder()\n\n        max_time = 8\n        batch_size = 16\n        emb_dim = 100\n        inputs = tf.random_uniform([batch_size, max_time, emb_dim],\n                                   maxval=1., dtype=tf.float32)\n        outputs, state = encoder(inputs)\n\n        cell_dim = encoder.hparams.rnn_cell.kwargs.num_units\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            outputs_, state_ = sess.run([outputs, state])\n            self.assertEqual(outputs_.shape, (batch_size, max_time, cell_dim))\n            self.assertEqual(state_[0].shape, (batch_size, cell_dim))\n\n        # case 2: with output layers\n        hparams = {\n            \"output_layer\": {\n                \"num_layers\": 2,\n                \"layer_size\": [100, 6],\n                \"dropout_layer_ids\": [0, 1, 2],\n                \"variational_dropout\": True\n            }\n        }\n        encoder = UnidirectionalRNNEncoder(hparams=hparams)\n\n        max_time = 8\n        batch_size = 16\n        emb_dim = 100\n        inputs = tf.random_uniform([batch_size, max_time, emb_dim],\n                                   maxval=1., dtype=tf.float32)\n        outputs, state, cell_outputs, output_size = encoder(\n            inputs, return_cell_output=True, return_output_size=True)\n\n        self.assertEqual(output_size[0], 6)\n        self.assertEqual(cell_outputs.shape[-1], encoder.cell.output_size)\n\n        out_dim = encoder.hparams.output_layer.layer_size[-1]\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            outputs_ = sess.run(outputs)\n            self.assertEqual(outputs_.shape, (batch_size, max_time, out_dim))\n\n\n    def test_encode_with_embedder(self):\n        \"\"\"Tests encoding companioned with :mod:`texar.modules.embedders`.\n        \"\"\"\n        embedder = WordEmbedder(vocab_size=20, hparams={\"dim\": 100})\n        inputs = tf.ones([64, 16], dtype=tf.int32)\n\n        encoder = UnidirectionalRNNEncoder()\n        outputs, state = encoder(embedder(inputs))\n\n        cell_dim = encoder.hparams.rnn_cell.kwargs.num_units\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            outputs_, state_ = sess.run([outputs, state])\n            self.assertEqual(outputs_.shape, (64, 16, cell_dim))\n            self.assertEqual(state_[0].shape, (64, cell_dim))\n\nclass BidirectionalRNNEncoderTest(tf.test.TestCase):\n    \"\"\"Tests :class:`~texar.modules.BidirectionalRNNEncoder` class.\n    \"\"\"\n\n    def test_trainable_variables(self):\n        \"\"\"Tests the functionality of automatically collecting trainable\n        variables.\n        \"\"\"\n        inputs = tf.placeholder(dtype=tf.float32, shape=[None, None, 100])\n\n        # case 1\n        encoder = BidirectionalRNNEncoder()\n        _, _ = encoder(inputs)\n        self.assertEqual(len(encoder.trainable_variables), 4)\n\n        # case 2\n        hparams = {\n            \"rnn_cell_fw\": {\n                \"dropout\": {\n                    \"input_keep_prob\": 0.5\n                }\n            }\n        }\n        encoder = BidirectionalRNNEncoder(hparams=hparams)\n        _, _ = encoder(inputs)\n        self.assertEqual(len(encoder.trainable_variables), 4)\n\n        # case 3\n        hparams = {\n            \"output_layer_fw\": {\n                \"num_layers\": 2,\n                \"layer_size\": [100, 6],\n                \"activation\": \"relu\",\n                \"final_layer_activation\": \"identity\",\n                \"dropout_layer_ids\": [0, 1, 2],\n                \"variational_dropout\": False\n            },\n            \"output_layer_bw\": {\n                \"num_layers\": 3,\n                \"other_dense_kwargs\": {\"use_bias\": False}\n            },\n            \"output_layer_share_config\": False\n        }\n        encoder = BidirectionalRNNEncoder(hparams=hparams)\n        _, _ = encoder(inputs)\n        self.assertEqual(len(encoder.trainable_variables), 4+4+3)\n        _, _ = encoder(inputs)\n        self.assertEqual(len(encoder.trainable_variables), 4+4+3)\n\n    def test_encode(self):\n        \"\"\"Tests encoding.\n        \"\"\"\n        # case 1\n        encoder = BidirectionalRNNEncoder()\n\n        max_time = 8\n        batch_size = 16\n        emb_dim = 100\n        inputs = tf.random_uniform([batch_size, max_time, emb_dim],\n                                   maxval=1., dtype=tf.float32)\n        outputs, state = encoder(inputs)\n\n        cell_dim = encoder.hparams.rnn_cell_fw.kwargs.num_units\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            outputs_, state_ = sess.run([outputs, state])\n            self.assertEqual(outputs_[0].shape,\n                             (batch_size, max_time, cell_dim))\n            self.assertEqual(state_[0][0].shape, (batch_size, cell_dim))\n\n        # case 2: with output layers\n        hparams = {\n            \"output_layer_fw\": {\n                \"num_layers\": 2,\n                \"layer_size\": [100, 6],\n                \"dropout_layer_ids\": [0, 1, 2],\n                \"variational_dropout\": True\n            }\n        }\n        encoder = BidirectionalRNNEncoder(hparams=hparams)\n\n        max_time = 8\n        batch_size = 16\n        emb_dim = 100\n        inputs = tf.random_uniform([batch_size, max_time, emb_dim],\n                                   maxval=1., dtype=tf.float32)\n        outputs, state, cell_outputs, output_size = encoder(\n            inputs, return_cell_output=True, return_output_size=True)\n\n        self.assertEqual(output_size[0][0], 6)\n        self.assertEqual(output_size[1][0], 6)\n        self.assertEqual(cell_outputs[0].shape[-1], encoder.cell_fw.output_size)\n        self.assertEqual(cell_outputs[1].shape[-1], encoder.cell_bw.output_size)\n\n        out_dim = encoder.hparams.output_layer_fw.layer_size[-1]\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            outputs_ = sess.run(outputs)\n            self.assertEqual(outputs_[0].shape, (batch_size, max_time, out_dim))\n            self.assertEqual(outputs_[1].shape, (batch_size, max_time, out_dim))\n\n# TODO(zhiting): not completed yet\n#class HierarchicalForwardRNNEncoderTest(tf.test.TestCase):\n#    \"\"\"Tests HierarchicalForwardRNNEncoder class.\n#    \"\"\"\n#\n#    def test_trainable_variables(self):\n#        \"\"\"Tests the functionality of automatically collecting trainable\n#        variables.\n#        \"\"\"\n#        encoder = HierarchicalForwardRNNEncoder(vocab_size=2)\n#        inputs = [[[1, 0], [0, 1], [0, 1]]]\n#        _, _ = encoder(inputs)\n#        self.assertEqual(len(encoder.trainable_variables), 5)\n#\n#    def test_encode(self):\n#        \"\"\"Tests encoding.\n#        \"\"\"\n#        vocab_size = 4\n#        encoder = HierarchicalForwardRNNEncoder(vocab_size=vocab_size)\n#\n#        max_major_time = 8\n#        max_minor_time = 6\n#        batch_size = 16\n#        inputs = tf.random_uniform([batch_size, max_major_time, max_minor_time],\n#                                   maxval=vocab_size,\n#                                   dtype=tf.int32)\n#        outputs, state = encoder(inputs)\n#\n#        cell_dim = encoder.hparams.rnn_cell.kwargs.num_units\n#        with self.test_session() as sess:\n#            sess.run(tf.global_variables_initializer())\n#            outputs_, state_ = sess.run([outputs, state])\n#            self.assertEqual(outputs_.shape, (batch_size, max_major_time, cell_dim))\n#            self.assertEqual(state_[0].shape, (batch_size, cell_dim))\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/modules/encoders/transformer_encoders.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nTransformer encoders with multihead self attention.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom texar.core import layers\nfrom texar.utils import transformer_attentions as attn\nfrom texar.modules.embedders.position_embedders import\\\n    SinusoidsPositionEmbedder, PositionEmbedder\nfrom texar.modules.encoders.encoder_base import EncoderBase\nfrom texar.modules.encoders.multihead_attention import MultiheadAttentionEncoder\nfrom texar.modules.networks.networks import FeedForwardNetwork\nfrom texar import utils\nfrom texar.utils.shapes import shape_list, mask_sequences\nfrom texar.utils.mode import is_train_mode\n\n# pylint: disable=too-many-locals, invalid-name\n\n__all__ = [\n    \"default_transformer_poswise_net_hparams\",\n    \"TransformerEncoder\"\n]\n\ndef default_transformer_poswise_net_hparams(output_dim=512):\n    \"\"\"Returns default hyperparameters of a\n    :class:`~texar.modules.FeedForwardNetwork` as a pos-wise network used\n    in :class:`~texar.modules.TransformerEncoder` and\n    :class:`~texar.modules.TransformerDecoder`.\n\n    This is a 2-layer dense network with dropout in-between.\n\n    .. code-block:: python\n\n        {\n            \"layers\": [\n                {\n                    \"type\": \"Dense\",\n                    \"kwargs\": {\n                        \"name\": \"conv1\",\n                        \"units\": output_dim*4,\n                        \"activation\": \"relu\",\n                        \"use_bias\": True,\n                    }\n                },\n                {\n                    \"type\": \"Dropout\",\n                    \"kwargs\": {\n                        \"rate\": 0.1,\n                    }\n                },\n                {\n                    \"type\": \"Dense\",\n                    \"kwargs\": {\n                        \"name\": \"conv2\",\n                        \"units\": output_dim,\n                        \"use_bias\": True,\n                    }\n                }\n            ],\n            \"name\": \"ffn\"\n        }\n\n    Args:\n        output_dim (int): The size of output dense layer.\n    \"\"\"\n    return {\n        \"layers\": [\n            {\n                \"type\": \"Dense\",\n                \"kwargs\": {\n                    \"name\": \"conv1\",\n                    \"units\": output_dim*4,\n                    \"activation\": \"relu\",\n                    \"use_bias\": True,\n                }\n            },\n            {\n                \"type\": \"Dropout\",\n                \"kwargs\": {\n                    \"rate\": 0.1,\n                }\n            },\n            {\n                \"type\": \"Dense\",\n                \"kwargs\": {\n                    \"name\": \"conv2\",\n                    \"units\": output_dim,\n                    \"use_bias\": True,\n                }\n            }\n        ],\n        \"name\": \"ffn\"\n    }\n\n\nclass TransformerEncoder(EncoderBase):\n    \"\"\"Transformer encoder that applies multi-head self attention for encoding\n    sequences.\n    Stacked `~texar.modules.encoders.MultiheadAttentionEncoder`,\n    `~texar.modules.FeedForwardNetwork` and residual connections.\n    Args:\n        hparams (dict or HParams, optional): Hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n\n    .. document private functions\n    .. automethod:: _build\n    \"\"\"\n    def __init__(self, hparams=None):\n        EncoderBase.__init__(self, hparams)\n\n        with tf.variable_scope(self.variable_scope):\n            if self._hparams.initializer:\n                tf.get_variable_scope().set_initializer(\n                    layers.get_initializer(self._hparams.initializer))\n            if self._hparams.position_embedder_type == 'sinusoids':\n                self.position_embedder = SinusoidsPositionEmbedder(\n                    self._hparams.position_embedder_hparams)\n            else:\n                self.position_embedder = PositionEmbedder(\n                    position_size=self._hparams.position_size,\n                    hparams=self._hparams.position_embedder_hparams)\n            # pylint: disable=protected-access\n            if self._hparams.dim != \\\n                self.position_embedder._hparams.dim:\n                raise ValueError('\"dim\" in '\n                                 'TransformerEncoder hparams must be equal '\n                                 'to \"dim\" in its '\n                                 'position_embedder_hparams.')\n\n            self.multihead_attention_list = []\n            self.poswise_networks = []\n            for i in range(self._hparams.num_blocks):\n                with tf.variable_scope(\"layer_{}\".format(i)):\n                    with tf.variable_scope('attention'):\n                        multihead_attention = MultiheadAttentionEncoder(\n                            self._hparams.multihead_attention)\n                        self.multihead_attention_list.append(\n                            multihead_attention)\n                    # pylint: disable=protected-access\n                    if self._hparams.dim != \\\n                        multihead_attention._hparams.output_dim:\n                        raise ValueError('The \"dim\" in the hparams of'\n                                         'multihead_attention should be equal'\n                                         'to the \"dim\" of TransformerEncoder')\n                    poswise_network = FeedForwardNetwork(\n                        hparams=self._hparams['poswise_feedforward'])\n                    # pylint: disable=protected-access\n                    if self._hparams.dim != \\\n                        poswise_network._hparams.layers[-1]['kwargs']['units']:\n                        # poswise_network._hparams.layers[-1]['units']:\n                        raise ValueError('The \"units\" in the \"kwargs\" of'\n                                         'FeedForwardNetwork should be equal'\n                                         'to the \"dim\" of TransformerEncoder')\n                    self.poswise_networks.append(poswise_network)\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        .. code-block:: python\n\n            {\n                \"num_blocks\": 6,\n                \"dim\": 512,\n                'position_embedder_type': 'sinusoids',\n                'position_size': None,\n                'position_embedder_hparams': None,\n                \"embedding_dropout\": 0.1,\n                \"residual_dropout\": 0.1,\n                \"poswise_feedforward\": default_transformer_poswise_net_hparams,\n                'multihead_attention': {\n                    'name': 'multihead_attention',\n                    'num_units': 512,\n                    'output_dim': 512,\n                    'num_heads': 8,\n                    'dropout_rate': 0.1,\n                    'output_dim': 512,\n                    'use_bias': False,\n                },\n                \"initializer\": None,\n                \"name\": \"transformer_encoder\"\n                'use_bert_config': False,\n            }\n\n        Here:\n\n        \"num_blocks\" : int\n            Number of stacked blocks.\n\n        \"dim\" : int\n            Hidden dimension of the encoders.\n\n        \"use_bert_config\": bool\n            If False, apply the default Transformer Encoder architecture.\n            If True, apply the Transformer Encoder architecture used in BERT.\n            The differences lie in:\n                1. The Normalization of the input embedding with dimension\n                2. The attention bias for padding tokens.\n                3. The residual connections between the internal tensors.\n\n        \"position_embedder_type\":\n            Choose from \"sinusoids\" or \"variables\".\n\n            \"sinusoids\":\n                create the position embedding as sinusoids, which is fixed.\n            \"variables\":\n                create the position embedding as trainable variables.\n\n        \"position_size\": int\n            The size of position embeddings.\n            Only be used when \"position_embedder_type\"is \"variables\".\n\n        \"position_embedder_hparams\" : dict, optional\n            Hyperparameters of a\n            :class:`~texar.modules.PositionEmbedder` as position\n            embedder if \"position_embedder_type\" is \"variables\",\n            or Hyperparameters of a\n            :class:`~texar.modules.SinusoidsPositionEmbedder` as position\n            embedder if \"position_embedder_type\" is \"sinusoids\".\n\n        \"embedding_dropout\" : float\n            Dropout rate of the input word and position embeddings.\n\n        \"residual_dropout\" :  float\n            Dropout rate of the residual connections.\n\n        \"poswise_feedforward\" : dict,\n            Hyperparameters for a feed-forward network used in residual\n            connections.\n            Make sure the dimension of the output tensor is equal to `dim`.\n\n            See :func:`~texar.modules.default_transformer_poswise_net_hparams`\n            for details.\n\n        \"multihead_attention\": dict,\n            Hyperparameters for the multihead attention strategy.\n            Make sure the \"output_dim\" in this module is equal to \"dim\".\n            See :func:\n                `~texar.modules.encoder.MultiheadAttentionEncoder.\n                default_harams` for details.\n\n        \"initializer\" : dict, optional\n            Hyperparameters of the default initializer that initializes\n            variables created in this module.\n            See :func:`~texar.core.get_initializer` for details.\n\n        \"name\" : str\n            Name of the module.\n        \"\"\"\n        return {\n            'num_blocks': 6,\n            'dim': 512,\n            'use_bert_config': False,\n            'position_embedder_type': 'sinusoids',\n            'position_size': None,\n            'position_embedder_hparams': None,\n            'embedding_dropout': 0.1,\n            'residual_dropout': 0.1,\n            'poswise_feedforward': default_transformer_poswise_net_hparams(),\n            'multihead_attention': {\n                'name': 'multihead_attention',\n                'num_units': 512,\n                'num_heads': 8,\n                'dropout_rate': 0.1,\n                'output_dim': 512,\n                'use_bias': False,\n            },\n            'initializer': None,\n            'name': 'transformer_encoder',\n        }\n\n    # pylint: disable=arguments-differ, too-many-branches, too-many-statements\n    def _build(self, inputs, sequence_length, mode=None):\n        \"\"\"Encodes the inputs.\n\n        Args:\n            inputs: A 3D Tensor of shape `[batch_size, max_time, dim]`,\n                containing the word embeddings of input sequences. Note that\n                the embedding dimension `dim` must equal \"dim\" in\n                :attr:`hparams`.\n            sequence_length: A 1D Tensor of shape `[batch_size]`. Input tokens\n                beyond respective sequence lengths are masked out\n                automatically.\n            mode (optional): A tensor taking value in\n                :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`,\n                including `TRAIN`, `EVAL`, and `PREDICT`. Used to toggle\n                dropout.\n                If `None` (default), :func:`texar.global_mode` is used.\n\n        Returns:\n            A Tensor of shape `[batch_size, max_time, dim]` containing the\n            encoded vectors.\n        \"\"\"\n        # Multiply input embedding with the sqrt of its dimension for\n        # normalization\n        if not self._hparams.use_bert_config:\n            inputs = inputs * self._hparams.dim**0.5\n            inputs = mask_sequences(inputs, sequence_length, tensor_rank=3)\n        _, lengths, _ = shape_list(inputs)\n\n        inputs_padding = 1 - tf.sequence_mask(\n            sequence_length, tf.shape(inputs)[1], dtype=tf.float32)\n        if self._hparams.use_bert_config:\n            ignore_padding = attn.attention_bias_ignore_padding(\n                inputs_padding, bias_value=-1e4)\n        else:\n            ignore_padding = attn.attention_bias_ignore_padding(\n                inputs_padding)\n\n        encoder_self_attention_bias = ignore_padding\n\n        positions = tf.expand_dims(tf.range(lengths, dtype=tf.int32), 0)\n        pos_embeds = self.position_embedder(positions)\n\n        input_embedding = inputs + pos_embeds\n\n        if self._hparams.use_bert_config:\n            x = layers.layer_normalize(input_embedding)\n            x = tf.layers.dropout(x,\n                                  rate=self._hparams.embedding_dropout,\n                                  training=is_train_mode(mode))\n        else:\n            x = tf.layers.dropout(input_embedding,\n                                  rate=self._hparams.embedding_dropout,\n                                  training=is_train_mode(mode))\n\n        # Just to keep consistent with BERT, actually makes no difference\n        if self._hparams.use_bert_config:\n            pad_remover = None\n        else:\n            pad_remover = utils.transformer_utils.PadRemover(inputs_padding)\n\n        for i in range(self._hparams.num_blocks):\n            with tf.variable_scope(\"layer_{}\".format(i)):\n                multihead_attention = self.multihead_attention_list[i]\n                # trivial difference between BERT and original Transformer\n                if self._hparams.use_bert_config:\n                    _queries_input = x\n                else:\n                    _queries_input = layers.layer_normalize(x)\n\n                attention_output = multihead_attention(\n                    queries=_queries_input,\n                    memory=_queries_input,\n                    memory_attention_bias=encoder_self_attention_bias,\n                    mode=mode,\n                )\n                attention_output = tf.layers.dropout(\n                    attention_output,\n                    rate=self._hparams.residual_dropout,\n                    training=is_train_mode(mode),\n                )\n                x = x + attention_output\n                with tf.variable_scope('output'):\n                    if self._hparams.use_bert_config:\n                        x = layers.layer_normalize(x)\n                        y = x\n                    else:\n                        y = layers.layer_normalize(x)\n                poswise_network = self.poswise_networks[i]\n                with tf.variable_scope(poswise_network.variable_scope):\n                    original_shape = shape_list(y)\n                    y = tf.reshape(y, [-1, self._hparams.dim])\n                    if pad_remover:\n                        y = tf.expand_dims(pad_remover.remove(y), axis=0)\n                        # [1, batch_size*seq_length, hidden_dim]\n                    layer_output = poswise_network(y, mode=mode)\n                    sub_output = tf.layers.dropout(\n                        layer_output,\n                        rate=self._hparams.residual_dropout,\n                        training=is_train_mode(mode)\n                    )\n                    if pad_remover:\n                        sub_output = tf.reshape(pad_remover.restore(tf.squeeze(\\\n                            sub_output, axis=0)), original_shape \\\n                        )\n                    else:\n                        sub_output = tf.reshape(sub_output, original_shape)\n\n                    x = x + sub_output\n                    if self._hparams.use_bert_config:\n                        x = layers.layer_normalize(x)\n\n        if not self._hparams.use_bert_config:\n            x = layers.layer_normalize(x)\n\n        if not self._built:\n            self._add_internal_trainable_variables()\n            self._built = True\n\n        return x\n"
  },
  {
    "path": "texar_repo/texar/modules/memory/__init__.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nMemory modules.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=wildcard-import\n\nfrom texar.modules.memory.memory_network import *\nfrom texar.modules.memory.embed_fns import *\n"
  },
  {
    "path": "texar_repo/texar/modules/memory/embed_fns.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nSome embed_fn s used in :class:`~texar.modules.memory.MemNetBase` and its\nsubclasses.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=invalid-name, too-many-arguments\n\n__all__ = [\n    'default_memnet_embed_fn_hparams',\n]\n\ndef default_memnet_embed_fn_hparams():\n    \"\"\"Returns a dictionary of hyperparameters with default hparams for\n    :func:`~texar.modules.memory.default_embed_fn`\n\n    .. code-block:: python\n\n        {\n            \"embedding\": {\n                \"dim\": 100\n            },\n            \"temporal_embedding\": {\n                \"dim\": 100\n            },\n            \"combine_mode\": \"add\"\n        }\n\n    Here:\n\n    \"embedding\" : dict, optional\n        Hyperparameters for embedding operations. See\n        :meth:`~texar.modules.WordEmbedder.default_hparams` of\n        :class:`~texar.modules.WordEmbedder` for details. If `None`, the\n        default hyperparameters are used.\n\n    \"temporal_embedding\" : dict, optional\n        Hyperparameters for temporal embedding operations. See\n        :meth:`~texar.modules.PositionEmbedder.default_hparams` of\n        :class:`~texar.modules.PositionEmbedder` for details. If `None`, the\n        default hyperparameters are used.\n\n    \"combine_mode\" : str\n        Either **'add'** or **'concat'**. If 'add', memory\n        embedding and temporal embedding are added up. In this case the two\n        embedders must have the same dimension. If 'concat', the two\n        embeddings are concated.\n    \"\"\"\n    return {\n        \"embedding\": {\n            \"dim\": 100\n        },\n        \"temporal_embedding\": {\n            \"dim\": 100\n        },\n        \"combine_mode\": \"add\"\n    }\n\n"
  },
  {
    "path": "texar_repo/texar/modules/memory/memory_network.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nEnd-to-end memory network described in\n(Sukhbaatar et al.) End-To-End Memory Networks\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom texar.module_base import ModuleBase\nfrom texar.modules.embedders import WordEmbedder, PositionEmbedder\nfrom texar.utils.mode import switch_dropout\nfrom texar.modules.memory.embed_fns import default_memnet_embed_fn_hparams\n\n# pylint: disable=invalid-name, too-many-instance-attributes, too-many-arguments\n# pylint: disable=too-many-locals\n\n__all__ = [\n    'MemNetBase',\n    'MemNetRNNLike',\n]\n\nclass MemNetSingleLayer(ModuleBase):\n    \"\"\"An A-C layer for memory network.\n\n    Args:\n        H (optional): The matrix :attr:`H` multiplied to :attr:`o` at the end.\n        hparams (dict or HParams, optional): Memory network single layer\n            hyperparameters. If it is not specified, the default hyperparameter\n            setting is used. See :attr:`default_hparams` for the structure and\n            default values.\n    \"\"\"\n\n    def __init__(self, H=None, hparams=None):\n        ModuleBase.__init__(self, hparams)\n\n        self._H = H\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        .. code-block:: python\n\n            {\n                \"name\": \"memnet_single_layer\"\n            }\n\n        Here:\n\n        \"name\": str\n            Name of the memory network single layer.\n        \"\"\"\n        return {\n            \"name\": \"memnet_single_layer\"\n        }\n\n    def _build(self, u, m, c, **kwargs):\n        \"\"\"An A-C operation with memory and query vector.\n\n        Args:\n            u (Tensor): The input query `Tensor` of shape `[None, memory_dim]`.\n            m (Tensor): Output of A operation. Should be in shape\n                `[None, memory_size, memory_dim]`.\n            c (Tensor): Output of C operation. Should be in shape\n                `[None, memory_size, memory_dim]`.\n\n        Returns:\n            A `Tensor` of shape same as :attr:`u`.\n        \"\"\"\n        # Input memory representation\n        p = tf.matmul(m, tf.expand_dims(u, axis=2))\n        p = tf.transpose(p, perm=[0, 2, 1])\n\n        p = tf.nn.softmax(p) # equ. (1)\n\n        # Output memory representation\n        o = tf.matmul(p, c) # equ. (2)\n        o = tf.squeeze(o, axis=[1])\n\n        if self._H:\n            u = tf.matmul(u, self._H) # RNN-like style\n        u_ = tf.add(u, o) # u^{k+1} = H u^k + o^k\n\n        if not self._built:\n            self._add_internal_trainable_variables()\n            if self._H:\n                self._add_trainable_variable(self._H)\n            self._built = True\n\n        return u_\n\nclass MemNetBase(ModuleBase):\n    \"\"\"Base class inherited by all memory network classes.\n\n    Args:\n        raw_memory_dim (int): Dimension size of raw memory entries\n            (before embedding). For example,\n            if a raw memory entry is a word, this is the **vocabulary size**\n            (imagine a one-hot representation of word). If a raw memory entry\n            is a dense vector, this is the dimension size of the vector.\n        input_embed_fn (optional): A callable that embeds raw memory entries\n            as inputs.\n            This corresponds to the `A` embedding operation in\n            (Sukhbaatar et al.)\n            If not provided, a default embedding operation is created as\n            specified in :attr:`hparams`. See\n            :meth:`~texar.modules.MemNetBase.get_default_embed_fn`\n            for details.\n        output_embed_fn (optional): A callable that embeds raw memory entries\n            as outputs.\n            This corresponds to the `C` embedding operation in\n            (Sukhbaatar et al.)\n            If not provided, a default embedding operation is created as\n            specified in :attr:`hparams`. See\n            :meth:`~texar.modules.MemNetBase.get_default_embed_fn`\n            for details.\n        query_embed_fn (optional): A callable that embeds query.\n            This corresponds to the `B` embedding operation in\n            (Sukhbaatar et al.). If not provided and \"use_B\" is True\n            in :attr:`hparams`, a default embedding operation is created as\n            specified in :attr:`hparams`. See\n            :meth:`~texar.modules.MemNetBase.get_default_embed_fn`\n            for details.\n            Notice: If you'd like to customize this callable, please follow\n            the same number and style of dimensions as in `input_embed_fn` or\n            `output_embed_fn`, and assume that the 2nd dimension of its\n            input and output (which corresponds to `memory_size`) is 1.\n        hparams (dict or HParams, optional): Hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n    \"\"\"\n\n    def __init__(self,\n                 raw_memory_dim,\n                 input_embed_fn=None,\n                 output_embed_fn=None,\n                 query_embed_fn=None,\n                 hparams=None):\n        ModuleBase.__init__(self, hparams)\n\n        self._raw_memory_dim = raw_memory_dim\n\n        self._n_hops = self._hparams.n_hops\n        self._relu_dim = self._hparams.relu_dim\n        self._memory_size = self._hparams.memory_size\n\n        with tf.variable_scope(self.variable_scope):\n            self._A, self._C, self._B, self._memory_dim = self._build_embed_fn(\n                input_embed_fn, output_embed_fn, query_embed_fn)\n\n            self.H = None\n            if self.hparams.use_H:\n                self.H = tf.get_variable(\n                    name=\"H\", shape=[self._memory_dim, self._memory_dim])\n\n    def _build_embed_fn(self, input_embed_fn, output_embed_fn, query_embed_fn):\n        # Optionally creates embed_fn's\n        memory_dim = self.hparams.memory_dim\n        mdim_A, mdim_C, mdim_B = None, None, None\n\n        A = input_embed_fn\n        if input_embed_fn is None:\n            A, mdim_A = self.get_default_embed_fn(\n                self._memory_size, self._hparams.A)\n            memory_dim = mdim_A\n\n        C = output_embed_fn\n        if output_embed_fn is None:\n            C, mdim_C = self.get_default_embed_fn(\n                self._memory_size, self._hparams.C)\n            if mdim_A is not None and mdim_A != mdim_C:\n                raise ValueError('Embedding config `A` and `C` must have '\n                                 'the same output dimension.')\n            memory_dim = mdim_C\n\n        B = query_embed_fn\n        if query_embed_fn is None and self._hparams.use_B:\n            B, mdim_B = self.get_default_embed_fn(1, self._hparams.B)\n            if mdim_A is not None and mdim_A != mdim_B:\n                raise ValueError('Embedding config `A` and `B` must have '\n                                 'the same output dimension.')\n            if mdim_C is not None and mdim_C != mdim_B:\n                raise ValueError('Embedding config `C` and `B` must have '\n                                 'the same output dimension.')\n            memory_dim = mdim_B\n\n        return A, C, B, memory_dim\n\n\n    def get_default_embed_fn(self, memory_size, embed_fn_hparams):\n        \"\"\"Creates a default embedding function. Can be used for A, C, or B\n        operation.\n\n        For B operation (i.e., query_embed_fn), :attr:`memory_size` must be 1.\n\n        The function is a combination of both memory embedding and temporal\n        embedding, with the combination method specified by \"combine_mode\" in\n        the `embed_fn_hparams`.\n\n        .. role:: python(code)\n           :language: python\n\n        Args:\n            embed_fn_hparams (dict or HParams): Hyperparameter of the\n                embedding function. See\n                :func:`~texar.modules.default_memnet_embed_fn` for details.\n\n        Returns:\n            A tuple `(embed_fn, memory_dim)`, where\n\n            - **`memory_dim`** is the dimension of memory entry embedding, \\\n            inferred from :attr:`embed_fn_hparams`.\n\n                - If `combine_mode` == 'add', `memory_dim` is the \\\n                embedder dimension.\n                - If `combine_mode` == 'concat', `memory_dim` is the sum \\\n                of the memory embedder dimension and the temporal embedder \\\n                dimension.\n\n            - **`embed_fn`** is an embedding function that takes in memory \\\n            and returns memory embedding. \\\n            Specifically, the function has signature \\\n            :python:`memory_embedding= embed_fn(memory=None, soft_memory=None)`\\\n            where one of `memory` and `soft_memory` is provided (but not both).\n\n            Args:\n                memory: An `int` Tensor of shape\n                    `[batch_size, memory_size]`\n                    containing memory indexes used for embedding lookup.\n                soft_memory: A Tensor of shape\n                    `[batch_size, memory_size, raw_memory_dim]`\n                    containing soft weights used to mix the embedding vectors.\n\n            Returns:\n                A Tensor of shape `[batch_size, memory_size, memory_dim]`\n                containing the memory entry embeddings.\n\n        \"\"\"\n        # memory embedder\n        embedder = WordEmbedder(\n            vocab_size=self._raw_memory_dim,\n            hparams=embed_fn_hparams[\"embedding\"]\n        )\n        # temporal embedder\n        temporal_embedder = PositionEmbedder(\n            position_size=memory_size,\n            hparams=embed_fn_hparams[\"temporal_embedding\"]\n        )\n\n        combine = embed_fn_hparams['combine_mode']\n        if combine == 'add':\n            if embedder.dim != temporal_embedder.dim:\n                raise ValueError('`embedding` and `temporal_embedding` must '\n                                 'have the same dimension for \"add\" '\n                                 'combination.')\n            memory_dim = embedder.dim\n        elif combine == 'concat':\n            memory_dim = embedder.dim + temporal_embedder.dim\n\n        def _embed_fn(memory, soft_memory, mode=None):\n            if memory is None and soft_memory is None:\n                raise ValueError(\n                    \"Either `memory` or `soft_memory` is required.\")\n            if memory is not None and soft_memory is not None:\n                raise ValueError(\n                    \"Must not specify `memory` and `soft_memory` at the \"\n                    \"same time.\")\n\n            embedded_memory = embedder(\n                ids=memory, soft_ids=soft_memory, mode=mode)\n            temporal_embedded = temporal_embedder(\n                sequence_length=tf.constant([memory_size]), mode=mode)\n            temporal_embedded = tf.tile(\n                temporal_embedded, [tf.shape(embedded_memory)[0], 1, 1])\n\n            if combine == 'add':\n                return tf.add(embedded_memory, temporal_embedded)\n            elif combine == 'concat':\n                return tf.concat([embedded_memory, temporal_embedded], axis=-1)\n            else:\n                raise ValueError('Unknown combine method: {}'.format(combine))\n\n        return _embed_fn, memory_dim\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        .. code-block:: python\n\n            {\n                \"n_hops\": 1,\n                \"memory_dim\": 100,\n                \"relu_dim\": 50,\n                \"memory_size\": 100,\n                \"A\": default_embed_fn_hparams,\n                \"C\": default_embed_fn_hparams,\n                \"B\": default_embed_fn_hparams,\n                \"use_B\": False,\n                \"use_H\": False,\n                \"dropout_rate\": 0,\n                \"variational\": False,\n                \"name\": \"memnet\",\n            }\n\n        Here:\n\n        \"n_hops\" : int\n            Number of hops.\n\n        \"memory_dim\" : int\n            Memory dimension, i.e., the dimension size of a memory entry\n            embedding. Ignored if at least one of the embedding functions is\n            created according to :attr:`hparams`. In this case\n            :attr:`memory_dim` is inferred from the created embed_fn.\n\n        \"relu_dim\" : int\n            Number of elements in :attr:`memory_dim` that have relu at the end\n            of each hop.\n            Should be not less than 0 and not more than :attr`memory_dim`.\n\n        \"memory_size\" : int\n            Number of entries in memory.\n\n            For example, the number of sentences {x_i} in Fig.1(a) of\n            (Sukhbaatar et al.) End-To-End Memory Networks.\n\n        \"use_B\" : bool\n            Whether to create the query embedding function. Ignored if\n            `query_embed_fn` is given to the constructor.\n\n        \"use_H\" : bool\n            Whether to perform a linear transformation with matrix `H` at\n            the end of each A-C layer.\n\n        \"dropout_rate\" : float\n            The dropout rate to apply to the output of each hop. Should\n            be between 0 and 1.\n            E.g., `dropout_rate=0.1` would drop out 10% of the units.\n\n        \"variational\" : bool\n            Whether to share dropout masks after each hop.\n        \"\"\"\n        return {\n            \"n_hops\": 1,\n            \"memory_dim\": 100,\n            \"relu_dim\": 50,\n            \"memory_size\": 100,\n            \"A\": default_memnet_embed_fn_hparams(),\n            \"C\": default_memnet_embed_fn_hparams(),\n            \"B\": default_memnet_embed_fn_hparams(),\n            \"use_B\": False,\n            \"use_H\": False,\n            \"dropout_rate\": 0,\n            \"variational\": False,\n            \"name\": \"memnet\",\n        }\n\n    def _build(self, memory, query, **kwargs):\n        raise NotImplementedError\n\n    @property\n    def memory_size(self):\n        \"\"\"The memory size.\n        \"\"\"\n        return self._memory_size\n\n    @property\n    def raw_memory_dim(self):\n        \"\"\"The dimension of memory element (or vocabulary size).\n        \"\"\"\n        return self._raw_memory_dim\n\n    @property\n    def memory_dim(self):\n        \"\"\"The dimension of embedded memory and all vectors in hops.\n        \"\"\"\n        return self._memory_dim\n\n\nclass MemNetRNNLike(MemNetBase):\n    \"\"\"An implementation of multi-layer end-to-end memory network,\n    with RNN-like weight tying described in\n    (Sukhbaatar et al.) End-To-End Memory Networks .\n\n    See :meth:`~texar.modules.MemNetBase.get_default_embed_fn` for default\n    embedding functions. Customized embedding functions must follow\n    the same signature.\n\n    Args:\n        raw_memory_dim (int): Dimension size of raw memory entries\n            (before embedding). For example,\n            if a raw memory entry is a word, this is the **vocabulary size**\n            (imagine a one-hot representation of word). If a raw memory entry\n            is a dense vector, this is the dimension size of the vector.\n        input_embed_fn (optional): A callable that embeds raw memory entries\n            as inputs.\n            This corresponds to the `A` embedding operation in\n            (Sukhbaatar et al.)\n            If not provided, a default embedding operation is created as\n            specified in :attr:`hparams`. See\n            :meth:`~texar.modules.MemNetBase.get_default_embed_fn`\n            for details.\n        output_embed_fn (optional): A callable that embeds raw memory entries\n            as outputs.\n            This corresponds to the `C` embedding operation in\n            (Sukhbaatar et al.)\n            If not provided, a default embedding operation is created as\n            specified in :attr:`hparams`. See\n            :meth:`~texar.modules.MemNetBase.get_default_embed_fn`\n            for details.\n        query_embed_fn (optional): A callable that embeds query.\n            This corresponds to the `B` embedding operation in\n            (Sukhbaatar et al.). If not provided and \"use_B\" is True\n            in :attr:`hparams`, a default embedding operation is created as\n            specified in :attr:`hparams`. See\n            :meth:`~texar.modules.MemNetBase.get_default_embed_fn`\n            for details.\n            For customized query_embed_fn, note that the function must follow\n            the signature of the default embed_fn where `memory_size` must\n            be 1.\n        hparams (dict or HParams, optional): Hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n    \"\"\"\n\n    def __init__(self,\n                 raw_memory_dim,\n                 input_embed_fn=None,\n                 output_embed_fn=None,\n                 query_embed_fn=None,\n                 hparams=None):\n        MemNetBase.__init__(self, raw_memory_dim, input_embed_fn,\n                            output_embed_fn, query_embed_fn, hparams)\n\n        with tf.variable_scope(self.variable_scope):\n            self._AC = MemNetSingleLayer(\n                self.H,\n                hparams={\"name\": \"AC\"})\n\n            self._W = tf.layers.Dense(\n                units=raw_memory_dim,\n                use_bias=False,\n                name=\"W\")\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        .. code-block:: python\n\n            {\n                \"n_hops\": 1,\n                \"memory_dim\": 100,\n                \"relu_dim\": 50,\n                \"memory_size\": 100,\n                \"A\": default_embed_fn_hparams,\n                \"C\": default_embed_fn_hparams,\n                \"B\": default_embed_fn_hparams,\n                \"use_B\": False,\n                \"use_H\": True,\n                \"dropout_rate\": 0,\n                \"variational\": False,\n                \"name\": \"memnet_rnnlike\",\n            }\n\n        Here:\n\n        \"n_hops\" : int\n            Number of hops.\n\n        \"memory_dim\" : int\n            Memory dimension, i.e., the dimension size of a memory entry\n            embedding. Ignored if at least one of the embedding functions is\n            created according to :attr:`hparams`. In this case\n            :attr:`memory_dim` is inferred from the created embed_fn.\n\n        \"relu_dim\" : int\n            Number of elements in :attr:`memory_dim` that have relu at the end\n            of each hop.\n            Should be not less than 0 and not more than :attr`memory_dim`.\n\n        \"memory_size\" : int\n            Number of entries in memory.\n\n            For example, the number of sentences {x_i} in Fig.1(a) of\n            (Sukhbaatar et al.) End-To-End Memory Networks.\n\n        \"use_B\" : bool\n            Whether to create the query embedding function. Ignored if\n            `query_embed_fn` is given to the constructor.\n\n        \"use_H\" : bool\n            Whether to perform a linear transformation with matrix `H` at\n            the end of each A-C layer.\n\n        \"dropout_rate\" : float\n            The dropout rate to apply to the output of each hop. Should\n            be between 0 and 1.\n            E.g., `dropout_rate=0.1` would drop out 10% of the units.\n\n        \"variational\" : bool\n            Whether to share dropout masks after each hop.\n        \"\"\"\n        hparams = MemNetBase.default_hparams()\n        hparams.update({\n            \"use_H\": True,\n            \"name\": \"memnet_rnnlike\"\n        })\n        return hparams\n\n    def _build(self, memory=None, query=None, soft_memory=None, soft_query=None,\n               mode=None, **kwargs):\n        \"\"\"Pass the :attr:`memory` and :attr:`query` through the memory network\n        and return the :attr:`logits` after the final matrix.\n\n        Only one of :attr:`memory` and :attr:`soft_memory` can be specified.\n        They should not be specified at the same time.\n\n        Args:\n            memory (optional): Memory used in A/C operations. By default, it\n                should be an integer tensor of shape\n                `[batch_size, memory_size]`,\n                containing the ids to embed if provided.\n            query (optional): Query vectors as the intial input of the memory\n                network.\n                If you'd like to apply some transformation (e.g., embedding)\n                on it before it's fed into the network, please set `use_B` to\n                True and add `query_embed_fn` when constructing this instance.\n                If `query_embed_fn` is set to\n                :meth:`~texar.modules.MemNetBase.get_default_embed_fn`,\n                it should be of shape `[batch_size]`.\n                If `use_B` is not set, it should be of shape\n                `[batch_size, memory_dim]`.\n            soft_memory (optional): Soft memory used in A/C operations. By\n                default, it should be a tensor of shape\n                `[batch_size, memory_size, raw_memory_dim]`,\n                containing the weights used to mix the embedding vectors.\n                If you'd like to apply a matrix multiplication on the memory,\n                this option can also be used.\n            soft_query (optional): Query vectors as the intial input of the\n                memory network.\n                If you'd like to apply some transformation (e.g., embedding)\n                on it before it's fed into the network, please set `use_B` to\n                True and add `query_embed_fn` when constructing this instance.\n                Similar to :attr:`soft_memory`, if `query_embed_fn` is set to\n                :meth:`~texar.modules.MemNetBase.get_default_embed_fn`,\n                then it must be of shape `[batch_size, raw_memory_dim]`.\n                Ignored if `use_B` is not set.\n            mode (optional): A tensor taking value in\n                :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, including\n                `TRAIN`, `EVAL`, and `PREDICT`. If `None`, dropout is\n                controlled by :func:`texar.global_mode`.\n        \"\"\"\n        if self._B is not None:\n            def _unsqueeze(x):\n                return x if x is None else tf.expand_dims(x, 1)\n            query = tf.squeeze(\n                self._B(_unsqueeze(query), _unsqueeze(soft_query), mode=mode),\n                1)\n        self._u = [query]\n        self._m = self._A(memory, soft_memory, mode=mode)\n        self._c = self._C(memory, soft_memory, mode=mode)\n\n        keep_prob = switch_dropout(1-self.hparams.dropout_rate, mode=mode)\n        if self.hparams.variational:\n            with tf.variable_scope(\"variational_dropout\"):\n                noise = tf.random_uniform(tf.shape(self._u[-1]))\n                random_tensor = keep_prob + noise\n                binary_tensor = tf.floor(random_tensor)\n            def _variational_dropout(val):\n                return tf.div(val, keep_prob) * binary_tensor\n\n        for _ in range(self._n_hops):\n            u_ = self._AC(self._u[-1], self._m, self._c)\n            if self._relu_dim == 0:\n                pass\n            elif self._relu_dim == self._memory_dim:\n                u_ = tf.nn.relu(u_)\n            elif 0 < self._relu_dim < self._memory_dim:\n                linear_part = u_[:, : self._memory_dim - self._relu_dim]\n                relu_part = u_[:, self._memory_dim - self._relu_dim :]\n                relued_part = tf.nn.relu(relu_part)\n                u_ = tf.concat(axis=1, values=[linear_part, relued_part])\n            else:\n                raise ValueError(\n                    \"relu_dim = {} is illegal\".format(self._relu_dim))\n            if self.hparams.variational:\n                u_ = _variational_dropout(u_)\n            else:\n                u_ = tf.nn.dropout(u_, keep_prob)\n            self._u.append(u_)\n\n        logits = self._W(self._u[-1])\n\n        if not self._built:\n            self._add_internal_trainable_variables()\n            self._built = True\n\n        return logits\n"
  },
  {
    "path": "texar_repo/texar/modules/memory/memory_network_test.py",
    "content": "\"\"\"\nUnit tests for memory networks.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom texar.modules.memory.memory_network import MemNetRNNLike\nfrom texar import context\n\n# pylint: disable=no-member, too-many-locals, too-many-instance-attributes\n# pylint: disable=too-many-arguments, protected-access\n\nclass MemNetRNNLikeTest(tf.test.TestCase):\n    \"\"\"Tests :class:`~texar.modules.memory.memory_network.MemNetRNNLike`.\n    \"\"\"\n\n    def _test_memory_dim(self, combine_mode='add', soft_memory=False,\n                         soft_query=False, use_B=False):\n        \"\"\"Tests :attr:`memory_dim` in the :attr:`combine_mode` and soft\n        options.\n        \"\"\"\n        print('testing: combine_mode={}, soft_memory={}, soft_query={}, '\n              'use_B={}'.format(combine_mode, soft_memory, soft_query, use_B))\n\n        n_hops = 3\n        if combine_mode == 'add' or combine_mode is None:\n            memory_dim = 19\n            embedding_dim = memory_dim\n            temporal_embedding_dim = memory_dim\n        elif combine_mode == 'concat':\n            embedding_dim = 19\n            temporal_embedding_dim = 17\n            memory_dim = embedding_dim + temporal_embedding_dim\n        else:\n            raise ValueError(\n                \"combine_mode = {} is not recognized\".format(combine_mode))\n        relu_dim = 13\n        memory_size = 7\n        raw_memory_dim = 11\n        batch_size = 2\n        embed_hparams = {\n            \"embedding\": {\n                \"dim\": embedding_dim,\n            },\n            \"temporal_embedding\": {\n                \"dim\": temporal_embedding_dim,\n            },\n            \"combine_mode\": combine_mode,\n        }\n        memnet_hparams = {\n            \"n_hops\": n_hops,\n            \"relu_dim\": relu_dim,\n            \"memory_size\": memory_size,\n            \"A\": embed_hparams,\n            \"C\": embed_hparams,\n            \"B\": embed_hparams,\n            \"use_B\": use_B,\n        }\n        \n        memnet = MemNetRNNLike(raw_memory_dim=raw_memory_dim,\n                               hparams=memnet_hparams)\n        kwargs = {}\n        if soft_memory:\n            kwargs['soft_memory'] = tf.random_uniform(\n                [batch_size, memory_size, raw_memory_dim])\n        else:\n            kwargs['memory'] = tf.tile(tf.expand_dims(\n                tf.range(memory_size, dtype=tf.int32), 0), [batch_size, 1])\n        if use_B:\n            if soft_query:\n                kwargs['soft_query'] = tf.random_uniform(\n                    [batch_size, raw_memory_dim])\n            else:\n                kwargs['query'] = tf.random_uniform(\n                    [batch_size], maxval=raw_memory_dim, dtype=tf.int32)\n        else:\n            kwargs['query'] = tf.random_uniform([batch_size, memory_dim])\n        logits = memnet(**kwargs)\n        self.assertEqual(memnet.memory_dim, memory_dim)\n        self.assertEqual(logits.shape[0], batch_size)\n        self.assertEqual(logits.shape[1], raw_memory_dim)\n\n    def test_memory_dim(self):\n        \"\"\"Tests :attr:`memory_dim` in different :attr:`combine_mode` and\n        different soft options.\n        \"\"\"\n        for combine_mode in ['add', 'concat']:\n            for soft_memory in [False, True]:\n                for use_B in [False, True]:\n                    for soft_query in ([False, True] if use_B else [False]):\n                        self._test_memory_dim(combine_mode, soft_memory,\n                                              soft_query, use_B)\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/modules/networks/__init__.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nModules of networks.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=wildcard-import\n\nfrom texar.modules.networks.network_base import *\nfrom texar.modules.networks.networks import *\nfrom texar.modules.networks.conv_networks import *\n"
  },
  {
    "path": "texar_repo/texar/modules/networks/conv_networks.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nVarious convolutional networks.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom texar.modules.networks.network_base import FeedForwardNetworkBase\nfrom texar.modules.networks.network_base import _build_layers\nfrom texar.core.layers import get_pooling_layer_hparams, get_activation_fn\nfrom texar.utils.utils import uniquify_str\nfrom texar.utils.shapes import mask_sequences\nfrom texar.hyperparams import HParams\n\n# pylint: disable=too-many-arguments, too-many-locals\n\n__all__ = [\n    \"_to_list\",\n    \"Conv1DNetwork\"\n]\n\ndef _to_list(value, name=None, list_length=None):\n    \"\"\"Converts hparam value into a list.\n\n    If :attr:`list_length` is given,\n    then the canonicalized :attr:`value` must be of\n    length :attr:`list_length`.\n    \"\"\"\n    if not isinstance(value, (list, tuple)):\n        if list_length is not None:\n            value = [value] * list_length\n        else:\n            value = [value]\n    if list_length is not None and len(value) != list_length:\n        name = '' if name is None else name\n        raise ValueError(\"hparams '%s' must be a list of length %d\"\n                         % (name, list_length))\n    return value\n\nclass Conv1DNetwork(FeedForwardNetworkBase):\n    \"\"\"Simple Conv-1D network which consists of a sequence of conv layers\n    followed with a sequence of dense layers.\n\n    Args:\n        hparams (dict, optional): Hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n\n    See :meth:`_build` for the inputs and outputs. The inputs must be a\n    3D Tensor of shape `[batch_size, length, channels]` (default), or\n    `[batch_size, channels, length]` (if `data_format` is set to\n    `'channels_last'` through :attr:`hparams`). For example, for sequence\n    classification, `length` corresponds to time steps, and `channels`\n    corresponds to embedding dim.\n\n    Example:\n\n        .. code-block:: python\n\n            nn = Conv1DNetwork() # Use the default structure\n\n            inputs = tf.random_uniform([64, 20, 256])\n            outputs = nn(inputs)\n            # outputs == Tensor of shape [64, 128], cuz the final dense layer\n            # has size 128.\n\n    .. document private functions\n    .. automethod:: _build\n    \"\"\"\n\n    def __init__(self, hparams=None):\n        FeedForwardNetworkBase.__init__(self, hparams)\n\n        with tf.variable_scope(self.variable_scope):\n            layer_hparams = self._build_layer_hparams()\n            _build_layers(self, layers=None, layer_hparams=layer_hparams)\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        .. code-block:: python\n\n            {\n                # (1) Conv layers\n                \"num_conv_layers\": 1,\n                \"filters\": 128,\n                \"kernel_size\": [3, 4, 5],\n                \"conv_activation\": \"relu\",\n                \"conv_activation_kwargs\": None,\n                \"other_conv_kwargs\": None,\n                # (2) Pooling layers\n                \"pooling\": \"MaxPooling1D\",\n                \"pool_size\": None,\n                \"pool_strides\": 1,\n                \"other_pool_kwargs\": None,\n                # (3) Dense layers\n                \"num_dense_layers\": 1,\n                \"dense_size\": 128,\n                \"dense_activation\": \"identity\",\n                \"dense_activation_kwargs\": None,\n                \"final_dense_activation\": None,\n                \"final_dense_activation_kwargs\": None,\n                \"other_dense_kwargs\": None,\n                # (4) Dropout\n                \"dropout_conv\": [1],\n                \"dropout_dense\": [],\n                \"dropout_rate\": 0.75,\n                # (5) Others\n                \"name\": \"conv1d_network\",\n            }\n\n        Here:\n\n        1. For **convolutional** layers:\n\n            \"num_conv_layers\" : int\n                Number of convolutional layers.\n\n            \"filters\" : int or list\n                The number of filters in the convolution, i.e., the\n                dimensionality\n                of the output space. If \"num_conv_layers\" > 1, \"filters\" must be\n                a list of \"num_conv_layers\" integers.\n\n            \"kernel_size\" : int or list\n                Lengths of 1D convolution windows.\n\n                - If \"num_conv_layers\" == 1, this can be a list of arbitrary \\\n                number\\\n                of `int` denoting different sized conv windows. The number of \\\n                filters of each size is specified by \"filters\". For example,\\\n                the default values will create 3 sets of filters, each of which\\\n                has kernel size of 3, 4, and 5, respectively, and has filter\\\n                number 128.\n                - If \"num_conv_layers\" > 1, this must be a list of length \\\n                \"num_conv_layers\". Each element can be an `int` or a list \\\n                of arbitrary number of `int` denoting the kernel size of \\\n                respective layer.\n\n            \"conv_activation\": str or callable\n                Activation function applied to the output of the convolutional\n                layers. Set to \"indentity\" to maintain a linear activation.\n                See :func:`~texar.core.get_activation_fn` for more details.\n\n            \"conv_activation_kwargs\" : dict, optional\n                Keyword arguments for conv layer activation functions.\n                See :func:`~texar.core.get_activation_fn` for more details.\n\n            \"other_conv_kwargs\" : dict, optional\n                Other keyword arguments for\n                :tf_main:`tf.layers.Conv1D <layers/Conv1d>` constructor, e.g.,\n                \"data_format\", \"padding\", etc.\n\n        2. For **pooling** layers:\n\n            \"pooling\" : str or class or instance\n                Pooling layer after each of the convolutional layer(s). Can\n                a pooling layer class, its name or module path, or a class\n                instance.\n\n            \"pool_size\" : int or list, optional\n                Size of the pooling window. If an `int`, all pooling layer\n                will have the same pool size. If a list, the list length must\n                equal \"num_conv_layers\". If `None` and the pooling type\n                is either\n                :tf_main:`MaxPooling <layers/MaxPooling1D>` or\n                :tf_main:`AveragePooling <layers/AveragePooling1D>`, the\n                pool size will be set to input size. That is, the output of\n                the pooling layer is a single unit.\n\n            \"pool_strides\" : int or list, optional\n                Strides of the pooling operation. If an `int`, all pooling layer\n                will have the same stride. If a list, the list length must\n                equal \"num_conv_layers\".\n\n            \"other_pool_kwargs\" : dict, optional\n                Other keyword arguments for pooling layer class constructor.\n\n        3. For **dense** layers (note that here dense layers always follow conv\n           and pooling layers):\n\n            \"num_dense_layers\" : int\n                Number of dense layers.\n\n            \"dense_size\" : int or list\n                Number of units of each dense layers. If an `int`, all dense\n                layers will have the same size. If a list of `int`, the list\n                length must equal \"num_dense_layers\".\n\n            \"dense_activation\" : str or callable\n                Activation function applied to the output of the dense\n                layers **except** the last dense layer output . Set to\n                \"indentity\" to maintain a linear activation.\n                See :func:`~texar.core.get_activation_fn` for more details.\n\n            \"dense_activation_kwargs\" : dict, optional\n                Keyword arguments for dense layer activation functions before\n                the last dense layer.\n                See :func:`~texar.core.get_activation_fn` for more details.\n\n            \"final_dense_activation\" : str or callable\n                Activation function applied to the output of the **last** dense\n                layer. Set to `None` or\n                \"indentity\" to maintain a linear activation.\n                See :func:`~texar.core.get_activation_fn` for more details.\n\n            \"final_dense_activation_kwargs\" : dict, optional\n                Keyword arguments for the activation function of last\n                dense layer.\n                See :func:`~texar.core.get_activation_fn` for more details.\n\n            \"other_dense_kwargs\" : dict, optional\n                Other keyword arguments for\n                :tf_main:`Dense <layers/Dense>`\n                layer class constructor.\n\n        4. For **dropouts**:\n\n            \"dropout_conv\" : int or list\n                The indexes of conv layers (starting from `0`) whose **inputs**\n                are applied with dropout. The index = :attr:`num_conv_layers`\n                means dropout applies to the final conv layer output. E.g.,\n\n                .. code-block:: python\n\n                    {\n                        \"num_conv_layers\": 2,\n                        \"dropout_conv\": [0, 2]\n                    }\n\n                will leads to a series of layers as\n                `-dropout-conv0-conv1-dropout-`.\n\n                The dropout mode (training or not) is controlled\n                by the :attr:`mode` argument of :meth:`_build`.\n\n            \"dropout_dense\" : int or list\n                Same as \"dropout_conv\" but applied to dense layers (index\n                starting from `0`).\n\n            \"dropout_rate\" : float\n                The dropout rate, between 0 and 1. E.g.,\n                `\"dropout_rate\": 0.1` would drop out 10% of elements.\n\n        5. Others:\n\n            \"name\" : str\n                Name of the network.\n        \"\"\"\n        return {\n            # Conv layers\n            \"num_conv_layers\": 1,\n            \"filters\": 128,\n            \"kernel_size\": [3, 4, 5],\n            \"conv_activation\": \"relu\",\n            \"conv_activation_kwargs\": None,\n            \"other_conv_kwargs\": None,\n            # Pooling layers\n            \"pooling\": \"MaxPooling1D\",\n            \"pool_size\": None,\n            \"pool_strides\": 1,\n            \"other_pool_kwargs\": None,\n            # Dense layers\n            \"num_dense_layers\": 1,\n            \"dense_size\": 128,\n            \"dense_activation\": \"identity\",\n            \"dense_activation_kwargs\": None,\n            \"final_dense_activation\": None,\n            \"final_dense_activation_kwargs\": None,\n            \"other_dense_kwargs\": None,\n            # Dropout\n            \"dropout_conv\": [1],\n            \"dropout_dense\": [],\n            \"dropout_rate\": 0.75,\n            # Others\n            \"name\": \"conv1d_network\",\n            \"@no_typecheck\": [\"filters\", \"kernel_size\", \"conv_activation\",\n                              \"pool_size\", \"pool_strides\",\n                              \"dense_size\", \"dense_activation\",\n                              \"dropout_conv\", \"dropout_dense\"]\n        }\n\n    def _build_pool_hparams(self):\n        pool_type = self._hparams.pooling\n        if pool_type == \"MaxPooling\":\n            pool_type = \"MaxPooling1D\"\n        elif pool_type == \"AveragePooling\":\n            pool_type = \"AveragePooling1D\"\n\n        npool = self._hparams.num_conv_layers\n        pool_size = _to_list(self._hparams.pool_size, \"pool_size\", npool)\n        strides = _to_list(self._hparams.pool_strides, \"pool_strides\", npool)\n\n        other_kwargs = self._hparams.other_pool_kwargs or {}\n        if isinstance(other_kwargs, HParams):\n            other_kwargs = other_kwargs.todict()\n        if not isinstance(other_kwargs, dict):\n            raise ValueError(\"hparams['other_pool_kwargs'] must be a dict.\")\n\n        pool_hparams = []\n        for i in range(npool):\n            kwargs_i = {\"pool_size\": pool_size[i], \"strides\": strides[i],\n                        \"name\": \"pool_%d\" % (i+1)}\n            kwargs_i.update(other_kwargs)\n            pool_hparams_ = get_pooling_layer_hparams({\"type\": pool_type,\n                                                       \"kwargs\": kwargs_i})\n            pool_hparams.append(pool_hparams_)\n\n        return pool_hparams\n\n    def _build_conv1d_hparams(self, pool_hparams):\n        \"\"\"Creates the hparams for each of the conv layers usable for\n        :func:`texar.core.layers.get_layer`.\n        \"\"\"\n        nconv = self._hparams.num_conv_layers\n        if len(pool_hparams) != nconv:\n            raise ValueError(\"`pool_hparams` must be of length %d\" % nconv)\n\n        filters = _to_list(self._hparams.filters, 'filters', nconv)\n\n        if nconv == 1:\n            kernel_size = _to_list(self._hparams.kernel_size)\n            if not isinstance(kernel_size[0], (list, tuple)):\n                kernel_size = [kernel_size]\n        elif nconv > 1:\n            kernel_size = _to_list(self._hparams.kernel_size,\n                                   'kernel_size', nconv)\n            kernel_size = [_to_list(ks) for ks in kernel_size]\n\n        other_kwargs = self._hparams.other_conv_kwargs or {}\n        if isinstance(other_kwargs, HParams):\n            other_kwargs = other_kwargs.todict()\n        if not isinstance(other_kwargs, dict):\n            raise ValueError(\"hparams['other_conv_kwargs'] must be a dict.\")\n\n        conv_pool_hparams = []\n        activation_fn = get_activation_fn(\n            self._hparams.conv_activation,\n            self._hparams.conv_activation_kwargs)\n        for i in range(nconv):\n            hparams_i = []\n            names = []\n            for ks_ij in kernel_size[i]:\n                name = uniquify_str(\"conv_%d\" % (i+1), names)\n                names.append(name)\n                conv_kwargs_ij = {\n                    \"filters\": filters[i],\n                    \"kernel_size\": ks_ij,\n                    \"activation\": activation_fn,\n                    \"name\": name\n                }\n                conv_kwargs_ij.update(other_kwargs)\n                hparams_i.append(\n                    {\"type\": \"Conv1D\", \"kwargs\": conv_kwargs_ij})\n            if len(hparams_i) == 1:\n                conv_pool_hparams.append([hparams_i[0], pool_hparams[i]])\n            else:  # creates MergeLayer\n                mrg_kwargs_layers = []\n                for hparams_ij in hparams_i:\n                    seq_kwargs_j = {\"layers\": [hparams_ij, pool_hparams[i]]}\n                    mrg_kwargs_layers.append(\n                        {\"type\": \"SequentialLayer\", \"kwargs\": seq_kwargs_j})\n                mrg_hparams = {\"type\": \"MergeLayer\",\n                               \"kwargs\": {\"layers\": mrg_kwargs_layers,\n                                          \"name\": \"conv_pool_%d\" % (i+1)}}\n                conv_pool_hparams.append(mrg_hparams)\n\n        return conv_pool_hparams\n\n    def _build_dense_hparams(self):\n        ndense = self._hparams.num_dense_layers\n        dense_size = _to_list(self._hparams.dense_size, 'dense_size', ndense)\n\n        other_kwargs = self._hparams.other_dense_kwargs or {}\n        if isinstance(other_kwargs, HParams):\n            other_kwargs = other_kwargs.todict()\n        if not isinstance(other_kwargs, dict):\n            raise ValueError(\"hparams['other_dense_kwargs'] must be a dict.\")\n\n        dense_hparams = []\n        activation_fn = get_activation_fn(\n            self._hparams.dense_activation,\n            self._hparams.dense_activation_kwargs)\n        for i in range(ndense):\n            if i == ndense - 1:\n                activation_fn = get_activation_fn(\n                    self._hparams.final_dense_activation,\n                    self._hparams.final_dense_activation_kwargs)\n\n            kwargs_i = {\"units\": dense_size[i],\n                        \"activation\": activation_fn,\n                        \"name\": \"dense_%d\" % (i+1)}\n            kwargs_i.update(other_kwargs)\n\n            dense_hparams.append({\"type\": \"Dense\", \"kwargs\": kwargs_i})\n\n        return dense_hparams\n\n    def _build_layer_hparams(self):\n        pool_hparams = self._build_pool_hparams()\n        conv_pool_hparams = self._build_conv1d_hparams(pool_hparams)\n        dense_hparams = self._build_dense_hparams()\n\n        def _dropout_hparams(layer_id):\n            return {\"type\": \"Dropout\",\n                    \"kwargs\": {\"rate\": self._hparams.dropout_rate,\n                               \"name\": \"dropout_%d\" % layer_id}}\n        dropout_conv = _to_list(self._hparams.dropout_conv)\n        dropout_dense = _to_list(self._hparams.dropout_dense)\n\n        layers_hparams = []\n        nconv = self._hparams.num_conv_layers\n        for conv_i in range(nconv):\n            if conv_i in dropout_conv:\n                layers_hparams.append(_dropout_hparams(conv_i))\n            if isinstance(conv_pool_hparams[conv_i], (list, tuple)):\n                layers_hparams += conv_pool_hparams[conv_i]\n            else:\n                layers_hparams.append(conv_pool_hparams[conv_i])\n        if nconv in dropout_conv:\n            layers_hparams.append(_dropout_hparams(nconv))\n\n        ndense = self._hparams.num_dense_layers\n        if ndense > 0: # Add flatten layers before dense layers\n            layers_hparams.append({\"type\": \"Flatten\"})\n        for dense_i in range(ndense):\n            if dense_i in dropout_dense:\n                layers_hparams.append(_dropout_hparams(dense_i + nconv))\n            layers_hparams.append(dense_hparams[dense_i])\n        if ndense in dropout_dense:\n            layers_hparams.append(_dropout_hparams(ndense + nconv))\n\n        return layers_hparams\n\n    def _build(self,    # pylint: disable=arguments-differ\n               inputs,\n               sequence_length=None,\n               dtype=None,\n               mode=None):\n        \"\"\"Feeds forward inputs through the network layers and returns outputs.\n\n        Args:\n            inputs: The inputs to the network, which is a 3D tensor.\n            sequence_length (optional): An int tensor of shape `[batch_size]`\n                containing the length of each element in :attr:`inputs`.\n                If given, time steps beyond the length will first be masked out\n                before feeding to the layers.\n            dtype (optional): Type of the inputs. If not provided, infers\n                from inputs automatically.\n            mode (optional): A tensor taking value in\n                :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, including\n                `TRAIN`, `EVAL`, and `PREDICT`. If `None`,\n                :func:`texar.global_mode` is used.\n\n        Returns:\n            The output of the final layer.\n        \"\"\"\n        if sequence_length is not None:\n            inputs = mask_sequences(\n                inputs, sequence_length, dtype=dtype, time_major=False,\n                tensor_rank=3)\n        return super(Conv1DNetwork, self)._build(inputs, mode=mode)\n\n"
  },
  {
    "path": "texar_repo/texar/modules/networks/conv_networks_test.py",
    "content": "#\n\"\"\"\nUnit tests for conv networks.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport tensorflow as tf\n\nimport texar as tx\nfrom texar.modules.networks.conv_networks import Conv1DNetwork\n\n\nclass Conv1DNetworkTest(tf.test.TestCase):\n    \"\"\"Tests :class:`~texar.modules.Conv1DNetwork` class.\n    \"\"\"\n\n    def test_feedforward(self):\n        \"\"\"Tests feed forward.\n        \"\"\"\n        network_1 = Conv1DNetwork()\n        self.assertEqual(len(network_1.layers), 4)\n        self.assertTrue(isinstance(network_1.layer_by_name(\"conv_pool_1\"),\n                                   tx.core.MergeLayer))\n        for layer in network_1.layers[0].layers:\n            self.assertTrue(isinstance(layer, tx.core.SequentialLayer))\n\n        inputs_1 = tf.ones([64, 16, 300], tf.float32)\n        outputs_1 = network_1(inputs_1)\n        self.assertEqual(outputs_1.shape, [64, 128])\n\n        hparams = {\n            # Conv layers\n            \"num_conv_layers\": 2,\n            \"filters\": 128,\n            \"kernel_size\": [[3, 4, 5], 4],\n            \"other_conv_kwargs\": {\"padding\": \"same\"},\n            # Pooling layers\n            \"pooling\": \"AveragePooling\",\n            \"pool_size\": 2,\n            \"pool_strides\": 1,\n            # Dense layers\n            \"num_dense_layers\": 3,\n            \"dense_size\": [128, 128, 10],\n            \"dense_activation\": \"relu\",\n            \"other_dense_kwargs\": {\"use_bias\": False},\n            # Dropout\n            \"dropout_conv\": [0, 1, 2],\n            \"dropout_dense\": 2\n        }\n        network_2 = Conv1DNetwork(hparams)\n        # nlayers = nconv-pool + nconv + npool + ndense + ndropout + flatten\n        self.assertEqual(len(network_2.layers), 1+1+1+3+4+1)\n        self.assertTrue(isinstance(network_2.layer_by_name(\"conv_pool_1\"),\n                                   tx.core.MergeLayer))\n        for layer in network_2.layers[1].layers:\n            self.assertTrue(isinstance(layer, tx.core.SequentialLayer))\n\n        inputs_2 = tf.ones([64, 16, 300], tf.float32)\n        outputs_2 = network_2(inputs_2)\n        self.assertEqual(outputs_2.shape, [64, 10])\n\n    def test_unknown_seq_length(self):\n        \"\"\"Tests use of pooling layer when the seq_length dimension of inputs\n        is `None`.\n        \"\"\"\n        network_1 = Conv1DNetwork()\n        inputs_1 = tf.placeholder(tf.float32, [64, None, 300])\n        outputs_1 = network_1(inputs_1)\n        self.assertEqual(outputs_1.shape, [64, 128])\n\n        hparams = {\n            # Conv layers\n            \"num_conv_layers\": 2,\n            \"filters\": 128,\n            \"kernel_size\": [[3, 4, 5], 4],\n            # Pooling layers\n            \"pooling\": \"AveragePooling\",\n            \"pool_size\": [2, None],\n            # Dense layers\n            \"num_dense_layers\": 1,\n            \"dense_size\": 10,\n        }\n        network = Conv1DNetwork(hparams)\n        # nlayers = nconv-pool + nconv + npool + ndense + ndropout + flatten\n        self.assertEqual(len(network.layers), 1+1+1+1+1+1)\n        self.assertTrue(isinstance(network.layer_by_name('pool_2'),\n                                   tx.core.AverageReducePooling1D))\n\n        inputs = tf.placeholder(tf.float32, [64, None, 300])\n        outputs = network(inputs)\n        self.assertEqual(outputs.shape, [64, 10])\n\n        hparams_2 = {\n            # Conv layers\n            \"num_conv_layers\": 1,\n            \"filters\": 128,\n            \"kernel_size\": 4,\n            \"other_conv_kwargs\": {'data_format': 'channels_first'},\n            # Pooling layers\n            \"pooling\": \"MaxPooling\",\n            \"other_pool_kwargs\": {'data_format': 'channels_first'},\n            # Dense layers\n            \"num_dense_layers\": 1,\n            \"dense_size\": 10,\n        }\n        network_2 = Conv1DNetwork(hparams_2)\n        inputs_2 = tf.placeholder(tf.float32, [64, 300, None])\n        outputs_2 = network_2(inputs_2)\n        self.assertEqual(outputs_2.shape, [64, 10])\n\n    def test_mask_input(self):\n        \"\"\"Tests masked inputs.\n        \"\"\"\n        network_1 = Conv1DNetwork()\n        inputs_1 = tf.ones([3, 16, 300], tf.float32)\n        seq_length = [10, 15, 1]\n        outputs_1 = network_1(inputs_1, sequence_length=seq_length)\n        self.assertEqual(outputs_1.shape, [3, 128])\n\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/modules/networks/network_base.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nBase class for feed forward neural networks.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\n\nimport tensorflow as tf\n\nfrom texar.module_base import ModuleBase\nfrom texar.utils import TexarError\nfrom texar.core.layers import get_layer\nfrom texar.utils.utils import uniquify_str\nfrom texar.utils.mode import is_train_mode\n\n# pylint: disable=too-many-instance-attributes, arguments-differ\n# pylint: disable=protected-access\n\n__all__ = [\n    \"_build_layers\",\n    \"FeedForwardNetworkBase\"\n]\n\ndef _build_layers(network, layers=None, layer_hparams=None):\n    \"\"\"Builds layers.\n\n    Either :attr:`layer_hparams` or :attr:`layers` must be\n    provided. If both are given, :attr:`layers` will be used.\n\n    Args:\n        network: An instance of a subclass of\n            :class:`~texar.modules.networks.network_base.FeedForwardNetworkBase`\n        layers (optional): A list of layer instances.\n        layer_hparams (optional): A list of layer hparams, each to which\n            is fed to :func:`~texar.core.layers.get_layer` to create the\n            layer instance.\n    \"\"\"\n    with tf.variable_scope(network.variable_scope):\n        if layers is not None:\n            network._layers = layers\n        else:\n            if layer_hparams is None:\n                raise ValueError(\n                    'Either `layer` or `layer_hparams` is required.')\n            network._layers = []\n            for _, hparams in enumerate(layer_hparams):\n                network._layers.append(get_layer(hparams=hparams))\n\n    for layer in network._layers:\n        layer_name = uniquify_str(layer.name, network._layer_names)\n        network._layer_names.append(layer_name)\n        network._layers_by_name[layer_name] = layer\n\nclass FeedForwardNetworkBase(ModuleBase):\n    \"\"\"Base class inherited by all feed-forward network classes.\n\n    Args:\n        hparams (dict, optional): Hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n\n    See :meth:`_build` for the inputs and outputs.\n    \"\"\"\n\n    def __init__(self, hparams=None):\n        ModuleBase.__init__(self, hparams)\n\n        self._layers = []\n        self._layer_names = []\n        self._layers_by_name = {}\n        self._layer_outputs = []\n        self._layer_outputs_by_name = {}\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        .. code-block:: python\n\n            {\n                \"name\": \"NN\"\n            }\n        \"\"\"\n        return {\n            \"name\": \"NN\"\n        }\n\n    def _build(self, inputs, mode=None):\n        \"\"\"Feeds forward inputs through the network layers and returns outputs.\n\n        Args:\n            inputs: The inputs to the network. The requirements on inputs\n                depends on the first layer and subsequent layers in the\n                network.\n            mode (optional): A tensor taking value in\n                :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, including\n                `TRAIN`, `EVAL`, and `PREDICT`. If `None`,\n                :func:`texar.global_mode` is used.\n\n        Returns:\n            The output of the network.\n        \"\"\"\n        training = is_train_mode(mode)\n\n        prev_outputs = inputs\n        for layer_id, layer in enumerate(self._layers):\n            if isinstance(layer, tf.layers.Dropout) or \\\n                    isinstance(layer, tf.layers.BatchNormalization):\n                outputs = layer(prev_outputs, training=training)\n            else:\n                outputs = layer(prev_outputs)\n            self._layer_outputs.append(outputs)\n            self._layer_outputs_by_name[self._layer_names[layer_id]] = outputs\n            prev_outputs = outputs\n\n        if not self._built:\n            self._add_internal_trainable_variables()\n            # Add trainable variables of `self._layers` which may be\n            # constructed externally.\n            for layer in self._layers:\n                self._add_trainable_variable(layer.trainable_variables)\n            self._built = True\n\n        return outputs\n\n    def append_layer(self, layer):\n        \"\"\"Appends a layer to the end of the network. The method is only\n        feasible before :attr:`_build` is called.\n\n        Args:\n            layer: A :tf_main:`tf.layers.Layer <layers/Layer>` instance, or\n                a dict of layer hyperparameters.\n        \"\"\"\n        if self._built:\n            raise TexarError(\"`FeedForwardNetwork.append_layer` can be \"\n                             \"called only before `_build` is called.\")\n\n        with tf.variable_scope(self.variable_scope):\n            layer_ = layer\n            if not isinstance(layer_, tf.layers.Layer):\n                layer_ = get_layer(hparams=layer_)\n            self._layers.append(layer_)\n            layer_name = uniquify_str(layer_.name, self._layer_names)\n            self._layer_names.append(layer_name)\n            self._layers_by_name[layer_name] = layer_\n\n    def has_layer(self, layer_name):\n        \"\"\"Returns `True` if the network with the name exists. Returns `False`\n        otherwise.\n\n        Args:\n            layer_name (str): Name of the layer.\n        \"\"\"\n        return layer_name in self._layers_by_name\n\n    def layer_by_name(self, layer_name):\n        \"\"\"Returns the layer with the name. Returns 'None' if the layer name\n        does not exist.\n\n        Args:\n            layer_name (str): Name of the layer.\n        \"\"\"\n        return self._layers_by_name.get(layer_name, None)\n\n    @property\n    def layers_by_name(self):\n        \"\"\"A dictionary mapping layer names to the layers.\n        \"\"\"\n        return self._layers_by_name\n\n    @property\n    def layers(self):\n        \"\"\"A list of the layers.\n        \"\"\"\n        return self._layers\n\n    @property\n    def layer_names(self):\n        \"\"\"A list of uniquified layer names.\n        \"\"\"\n        return self._layer_names\n\n    def layer_outputs_by_name(self, layer_name):\n        \"\"\"Returns the output tensors of the layer with the specified name.\n        Returns `None` if the layer name does not exist.\n\n        Args:\n            layer_name (str): Name of the layer.\n        \"\"\"\n        return self._layer_outputs_by_name.get(layer_name, None)\n\n    @property\n    def layer_outputs(self):\n        \"\"\"A list containing output tensors of each layer.\n        \"\"\"\n        return self._layer_outputs\n"
  },
  {
    "path": "texar_repo/texar/modules/networks/networks.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nVarious neural networks and related utilities.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\n\nimport tensorflow as tf\n\nfrom texar.modules.networks.network_base import FeedForwardNetworkBase\nfrom texar.modules.networks.network_base import _build_layers\n\n__all__ = [\n    \"FeedForwardNetwork\"\n]\n\nclass FeedForwardNetwork(FeedForwardNetworkBase):\n    \"\"\"Feed-forward neural network that consists of a sequence of layers.\n\n    Args:\n        layers (list, optional): A list of :tf_main:`Layer <layers/Layer>`\n            instances composing the network. If not given, layers are created\n            according to :attr:`hparams`.\n        hparams (dict, optional): Embedder hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n\n    See :meth:`~texar.modules.RNNDecoderBase._build` of\n    :class:`~texar.modules.FeedForwardNetworkBase` for the inputs and outputs.\n\n    Example:\n\n        .. code-block:: python\n\n            hparams = { # Builds a two-layer dense NN\n                \"layers\": [\n                    { \"type\": \"Dense\", \"kwargs\": { \"units\": 256 },\n                    { \"type\": \"Dense\", \"kwargs\": { \"units\": 10 }\n                ]\n            }\n            nn = FeedForwardNetwork(hparams=hparams)\n\n            inputs = tf.random_uniform([64, 100])\n            outputs = nn(inputs)\n            # outputs == Tensor of shape [64, 10]\n    \"\"\"\n\n    def __init__(self, layers=None, hparams=None):\n        FeedForwardNetworkBase.__init__(self, hparams)\n\n        with tf.variable_scope(self.variable_scope):\n            _build_layers(\n                self, layers=layers, layer_hparams=self._hparams.layers)\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        .. code-block:: python\n\n            {\n                \"layers\": [],\n                \"name\": \"NN\"\n            }\n\n        Here:\n\n        \"layers\" : list\n            A list of layer hyperparameters. See :func:`~texar.core.get_layer`\n            for the details of layer hyperparameters.\n\n        \"name\" : str\n            Name of the network.\n        \"\"\"\n        return {\n            \"layers\": [],\n            \"name\": \"NN\"\n        }\n\n"
  },
  {
    "path": "texar_repo/texar/modules/networks/networks_test.py",
    "content": "\"\"\"\nUnit tests for feed forward neural networks.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom texar.modules.networks.networks import FeedForwardNetwork\n\n# pylint: disable=no-member, invalid-name\n\nclass FeedForwardNetworkTest(tf.test.TestCase):\n    \"\"\"Tests the class\n    :class:`~texar.modules.networks.networks.FeedForwardNetwork`.\n    \"\"\"\n\n    def test_feedforward(self):\n        \"\"\"Tests feed-forward.\n        \"\"\"\n        hparams = {\n            \"layers\": [\n                {\n                    \"type\": \"Dense\",\n                },\n                {\n                    \"type\": \"Dense\",\n                }\n            ]\n        }\n\n        nn = FeedForwardNetwork(hparams=hparams)\n\n        self.assertEqual(len(nn.layers), len(hparams[\"layers\"]))\n        _ = nn(tf.ones([64, 16, 16]))\n        self.assertEqual(len(nn.trainable_variables),\n                         len(hparams[\"layers\"]) * 2)\n        self.assertEqual(len(nn.layer_outputs), len(hparams[\"layers\"]))\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/modules/policies/__init__.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nModules of Texar policies.\n\"\"\"\n\n# pylint: disable=wildcard-import\n\nfrom texar.modules.policies.policy_nets import *\n"
  },
  {
    "path": "texar_repo/texar/modules/policies/policy_nets.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Policy models based on feed forward networks.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport numpy as np\n\nimport tensorflow as tf\n\nfrom texar.module_base import ModuleBase\nfrom texar.agents.agent_utils import Space\nfrom texar.utils import utils\nfrom texar.utils.dtypes import get_tf_dtype\n\n# pylint: disable=no-member\n\n__all__ = [\n    'PolicyNetBase',\n    'CategoricalPolicyNet'\n]\n\nclass PolicyNetBase(ModuleBase):\n    \"\"\"Policy net that takes in states and outputs actions.\n\n    Args:\n        network (optional): A network that takes in state and returns\n            outputs for generating actions. For example, an instance of subclass\n            of :class:`~texar.modules.FeedForwardNetworkBase`. If `None`,\n            a network is created as specified in :attr:`hparams`.\n        network_kwargs (dict, optional): Keyword arguments for network\n            constructor.\n            Note that the `hparams` argument for network\n            constructor is specified in the \"network_hparams\" field of\n            :attr:`hparams` and should not be included in `network_kwargs`.\n            Ignored if :attr:`network` is given.\n        hparams (dict or HParams, optional): Hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n    \"\"\"\n    def __init__(self,\n                 network=None,\n                 network_kwargs=None,\n                 hparams=None):\n        ModuleBase.__init__(self, hparams=hparams)\n\n        with tf.variable_scope(self.variable_scope):\n            self._build_network(network, network_kwargs)\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        .. role:: python(code)\n           :language: python\n\n        .. code-block:: python\n\n            {\n                'network_type': 'FeedForwardNetwork',\n                'network_hparams': {\n                    'layers': [\n                        {\n                            'type': 'Dense',\n                            'kwargs': {'units': 256, 'activation': 'relu'}\n                        },\n                        {\n                            'type': 'Dense',\n                            'kwargs': {'units': 256, 'activation': 'relu'}\n                        },\n                    ]\n                },\n                'distribution_kwargs': None,\n                'name': 'policy_net',\n            }\n\n        Here:\n\n        \"network_type\" : str or class or instance\n            A network that takes in state and returns outputs for\n            generating actions. This can be a class, its name or module path,\n            or a class instance. Ignored if `network` is given to the\n            constructor.\n\n        \"network_hparams\" : dict\n            Hyperparameters for the network. With the :attr:`network_kwargs`\n            argument to the constructor, a network is created with\n            :python:`network_class(**network_kwargs, hparams=network_hparams)`.\n\n            For example, the default values creates a two-layer dense network.\n\n        \"distribution_kwargs\" : dict, optional\n            Keyword arguments for distribution constructor. A distribution\n            would be created for action sampling.\n\n        \"name\" : str\n            Name of the policy.\n        \"\"\"\n        return {\n            'network_type': 'FeedForwardNetwork',\n            'network_hparams': {\n                'layers': [\n                    {\n                        'type': 'Dense',\n                        'kwargs': {'units': 256, 'activation': 'relu'}\n                    },\n                    {\n                        'type': 'Dense',\n                        'kwargs': {'units': 256, 'activation': 'relu'}\n                    },\n                ]\n            },\n            'distribution_kwargs': None,\n            'name': 'policy_net',\n            '@no_typecheck': ['network_type', 'network_hparams']\n        }\n\n    def _build_network(self, network, kwargs):\n        if network is not None:\n            self._network = network\n        else:\n            kwargs = utils.get_instance_kwargs(\n                kwargs, self._hparams.network_hparams)\n            self._network = utils.check_or_get_instance(\n                self._hparams.network_type,\n                kwargs,\n                module_paths=['texar.modules', 'texar.custom'])\n\n    def _build(self, inputs, mode=None): # pylint: disable=arguments-differ\n        raise NotImplementedError\n\n    @property\n    def network(self):\n        \"\"\"The network.\n        \"\"\"\n        return self._network\n\n\n#TODO(zhiting): Allow structured discrete actions.\nclass CategoricalPolicyNet(PolicyNetBase):\n    \"\"\"Policy net with Categorical distribution for discrete scalar actions.\n\n    This is a combination of a network with a top-layer distribution for\n    action sampling.\n\n    Args:\n        action_space (optional): An instance of :class:`~texar.agents.Space`\n            specifying the action space. If not given, an discrete action space\n            `[0, high]` is created with `high` specified in :attr:`hparams`.\n        network (optional): A network that takes in state and returns\n            outputs for generating actions. For example, an instance of subclass\n            of :class:`~texar.modules.FeedForwardNetworkBase`. If `None`,\n            a network is created as specified in :attr:`hparams`.\n        network_kwargs (dict, optional): Keyword arguments for network\n            constructor.\n            Note that the `hparams` argument for network\n            constructor is specified in the \"network_hparams\" field of\n            :attr:`hparams` and should not be included in `network_kwargs`.\n            Ignored if :attr:`network` is given.\n        hparams (dict or HParams, optional): Hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n\n    .. document private functions\n    .. automethod:: _build\n    \"\"\"\n\n    def __init__(self,\n                 action_space=None,\n                 network=None,\n                 network_kwargs=None,\n                 hparams=None):\n        PolicyNetBase.__init__(self, hparams=hparams)\n\n        with tf.variable_scope(self.variable_scope):\n            if action_space is None:\n                action_space = Space(\n                    low=0, high=self._hparams.action_space, dtype=np.int32)\n            self._action_space = action_space\n            self._append_output_layer()\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        .. code-block:: python\n\n            {\n                'network_type': 'FeedForwardNetwork',\n                'network_hparams': {\n                    'layers': [\n                        {\n                            'type': 'Dense',\n                            'kwargs': {'units': 256, 'activation': 'relu'}\n                        },\n                        {\n                            'type': 'Dense',\n                            'kwargs': {'units': 256, 'activation': 'relu'}\n                        },\n                    ]\n                },\n                'distribution_kwargs': {\n                    'dtype': 'int32',\n                    'validate_args': False,\n                    'allow_nan_stats': True\n                },\n                'action_space': 2,\n                'make_output_layer': True,\n                'name': 'categorical_policy_net'\n            }\n\n        Here:\n\n        \"distribution_kwargs\" : dict\n            Keyword arguments for the :tf_main:`Categorical\n            <distributions/Categorical>` distribution constructor. Arguments\n            `logits` and `probs` should not be included as they are inferred\n            from the inputs. Argument `dtype` can be a string (e.g., `int32`)\n            and will be converted to a corresponding tf dtype.\n\n        \"action_space\" : int\n            Upper bound of the action space. The resulting action space is\n            all discrete scalar numbers between 0 and the upper bound specified\n            here (both inclusive).\n\n        \"make_output_layer\" : bool\n            Whether to append a dense layer to the network to transform\n            features to logits for action sampling. If `False`, the final layer\n            output of network must match the action space.\n\n        See :class:`~texar.modules.PolicyNetBase.default_hparams` for details\n        of other hyperparameters.\n        \"\"\"\n        hparams = PolicyNetBase.default_hparams()\n        hparams.update({\n            'distribution_kwargs': {\n                'dtype': 'int32',\n                'validate_args': False,\n                'allow_nan_stats': True\n            },\n            'action_space': 2,\n            'make_output_layer': True,\n            'name': 'categorical_policy_net'\n        })\n        return hparams\n\n    def _append_output_layer(self):\n        if not self._hparams.make_output_layer:\n            return\n\n        if self._action_space.shape != ():\n            raise ValueError('Only scalar discrete action is supported.')\n        else:\n            output_size = self._action_space.high - self._action_space.low\n\n        layer_hparams = {\n            'type': 'Dense',\n            'kwargs': {'units': output_size}\n        }\n        self._network.append_layer(layer_hparams)\n\n    def _build(self, inputs, mode=None):\n        \"\"\"Takes in states and outputs actions.\n\n        Args:\n            inputs: Inputs to the policy network with the first dimension\n                the batch dimension.\n            mode (optional): A tensor taking value in\n                :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, including\n                `TRAIN`, `EVAL`, and `PREDICT`. If `None`,\n                :func:`texar.global_mode` is used.\n\n        Returns\n            A `dict` including fields `\"logits\"`, `\"action\"`, and `\"dist\"`,\n            where\n\n            - **\"logits\"**: A Tensor of shape \\\n            `[batch_size] + action_space size` used for categorical \\\n            distribution sampling.\n            - **\"action\"**: A Tensor of shape \\\n            `[batch_size] + action_space.shape`.\n            - **\"dist\"**: The \\\n            :tf_main:`Categorical <distributions/Categorical>` based on the \\\n            logits.\n        \"\"\"\n        logits = self._network(inputs, mode=mode)\n\n        dkwargs = self._hparams.distribution_kwargs.todict()\n        dkwargs['dtype'] = get_tf_dtype(dkwargs['dtype'])\n        dist = tf.distributions.Categorical(logits=logits, **dkwargs)\n\n        action = dist.sample()\n        to_shape = [-1] # for batch dimension\n        to_shape.extend(list(self._action_space.shape))\n        action = tf.reshape(action, to_shape)\n\n        outputs = {\n            \"logits\": logits,\n            \"action\": action,\n            \"dist\": dist\n        }\n\n        if not self._built:\n            self._add_internal_trainable_variables()\n            self._add_trainable_variable(self._network.trainable_variables)\n            self._built = True\n\n        return outputs\n\n    @property\n    def action_space(self):\n        \"\"\"An instance of :class:`~texar.agents.Space` specifiying the\n        action space.\n        \"\"\"\n        return self._action_space\n"
  },
  {
    "path": "texar_repo/texar/modules/policies/policy_nets_test.py",
    "content": "#\n\"\"\"\nTests policy nets.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport tensorflow as tf\n\nfrom texar.modules.policies.policy_nets import CategoricalPolicyNet\n\nclass CategoricalPolicyNetTest(tf.test.TestCase):\n    \"\"\"Tests :class:`texar.modules.CategoricalPolicyNet`.\n    \"\"\"\n\n    def test_categorical_policy(self):\n        \"\"\"Tests logics.\n        \"\"\"\n        policy = CategoricalPolicyNet()\n\n        inputs = tf.random_uniform(shape=[1, 4])\n        outputs = policy(inputs=inputs)\n        self.assertEqual(list(outputs['action'].shape[1:]),\n                         list(policy.action_space.shape))\n        self.assertIsInstance(outputs['dist'],\n                              tf.distributions.Categorical)\n\n\n        inputs = tf.random_uniform(shape=[64, 4])\n        outputs = policy(inputs=inputs)\n        self.assertEqual(list(outputs['action'].shape[1:]),\n                         list(policy.action_space.shape))\n        self.assertEqual(int(outputs['action'].shape[0]), 64)\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/modules/qnets/__init__.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nModules of texar library qnets.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=wildcard-import\n\nfrom texar.modules.qnets.qnets import *\n"
  },
  {
    "path": "texar_repo/texar/modules/qnets/qnets.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Q networks for RL.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport numpy as np\n\nimport tensorflow as tf\n\nfrom texar.module_base import ModuleBase\nfrom texar.agents.agent_utils import Space\nfrom texar.utils import utils\n\n# pylint: disable=no-member\n\n__all__ = [\n    'QNetBase',\n    'CategoricalQNet'\n]\n\nclass QNetBase(ModuleBase):\n    \"\"\"Base class inheritted by all Q net classes. A Q net takes in states\n    and outputs Q value of actions.\n\n    Args:\n        network (optional): A network that takes in state and returns\n            Q values. For example, an instance of subclass\n            of :class:`~texar.modules.FeedForwardNetworkBase`. If `None`,\n            a network is created as specified in :attr:`hparams`.\n        network_kwargs (dict, optional): Keyword arguments for network\n            constructor.\n            Note that the `hparams` argument for network\n            constructor is specified in the \"network_hparams\" field of\n            :attr:`hparams` and should not be included in `network_kwargs`.\n            Ignored if :attr:`network` is given.\n        hparams (dict or HParams, optional): Hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n    \"\"\"\n    def __init__(self,\n                 network=None,\n                 network_kwargs=None,\n                 hparams=None):\n        ModuleBase.__init__(self, hparams=hparams)\n\n        with tf.variable_scope(self.variable_scope):\n            self._build_network(network, network_kwargs)\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        .. role:: python(code)\n           :language: python\n\n        .. code-block:: python\n\n            {\n                'network_type': 'FeedForwardNetwork',\n                'network_hparams': {\n                    'layers': [\n                        {\n                            'type': 'Dense',\n                            'kwargs': {'units': 256, 'activation': 'relu'}\n                        },\n                        {\n                            'type': 'Dense',\n                            'kwargs': {'units': 256, 'activation': 'relu'}\n                        },\n                    ]\n                },\n                'name': 'q_net',\n            }\n\n        Here:\n\n        \"network_type\" : str or class or instance\n            A network that takes in state and returns outputs for\n            generating actions. This can be a class, its name or module path,\n            or a class instance. Ignored if `network` is given to the\n            constructor.\n\n        \"network_hparams\" : dict\n            Hyperparameters for the network. With the :attr:`network_kwargs`\n            argument to the constructor, a network is created with\n            :python:`network_class(**network_kwargs, hparams=network_hparams)`.\n\n            For example, the default values creates a two-layer dense network.\n\n        \"name\" : str\n            Name of the Q net.\n        \"\"\"\n        return {\n            'network_type': 'FeedForwardNetwork',\n            'network_hparams': {\n                'layers': [\n                    {\n                        'type': 'Dense',\n                        'kwargs': {'units': 256, 'activation': 'relu'}\n                    },\n                    {\n                        'type': 'Dense',\n                        'kwargs': {'units': 256, 'activation': 'relu'}\n                    },\n                ]\n            },\n            'name': 'q_net',\n            '@no_typecheck': ['network_type', 'network_hparams']\n        }\n\n    def _build_network(self, network, kwargs):\n        if network is not None:\n            self._network = network\n        else:\n            kwargs = utils.get_instance_kwargs(\n                kwargs, self._hparams.network_hparams)\n            self._network = utils.check_or_get_instance(\n                self._hparams.network_type,\n                kwargs,\n                module_paths=['texar.modules', 'texar.custom'])\n\n    def _build(self, inputs, mode=None): # pylint: disable=arguments-differ\n        raise NotImplementedError\n\n    @property\n    def network(self):\n        \"\"\"The network.\n        \"\"\"\n        return self._network\n\n\nclass CategoricalQNet(QNetBase):\n    \"\"\"Q net with categorical scalar action space.\n\n    Args:\n        action_space (optional): An instance of :class:`~texar.agents.Space`\n            specifying the action space. If not given, an discrete action space\n            `[0, high]` is created with `high` specified in :attr:`hparams`.\n        network (optional): A network that takes in state and returns\n            Q values. For example, an instance of subclass\n            of :class:`~texar.modules.FeedForwardNetworkBase`. If `None`,\n            a network is created as specified in :attr:`hparams`.\n        network_kwargs (dict, optional): Keyword arguments for network\n            constructor.\n            Note that the `hparams` argument for network\n            constructor is specified in the \"network_hparams\" field of\n            :attr:`hparams` and should not be included in `network_kwargs`.\n            Ignored if :attr:`network` is given.\n        hparams (dict or HParams, optional): Hyperparameters. Missing\n            hyperparamerter will be set to default values. See\n            :meth:`default_hparams` for the hyperparameter sturcture and\n            default values.\n\n    .. document private functions\n    .. automethod:: _build\n    \"\"\"\n    def __init__(self,\n                 action_space=None,\n                 network=None,\n                 network_kwargs=None,\n                 hparams=None):\n        QNetBase.__init__(self, hparams=hparams)\n\n        with tf.variable_scope(self.variable_scope):\n            if action_space is None:\n                action_space = Space(\n                    low=0, high=self._hparams.action_space, dtype=np.int32)\n            self._action_space = action_space\n            self._append_output_layer()\n\n    @staticmethod\n    def default_hparams():\n        \"\"\"Returns a dictionary of hyperparameters with default values.\n\n        .. code-block:: python\n\n            {\n                'network_type': 'FeedForwardNetwork',\n                'network_hparams': {\n                    'layers': [\n                        {\n                            'type': 'Dense',\n                            'kwargs': {'units': 256, 'activation': 'relu'}\n                        },\n                        {\n                            'type': 'Dense',\n                            'kwargs': {'units': 256, 'activation': 'relu'}\n                        },\n                    ]\n                },\n                'action_space': 2,\n                'make_output_layer': True,\n                'name': 'q_net'\n            }\n\n        Here:\n\n        \"action_space\" : int\n            Upper bound of the action space. The resulting action space is\n            all discrete scalar numbers between 0 and the upper bound specified\n            here (both inclusive).\n\n        \"make_output_layer\" : bool\n            Whether to append a dense layer to the network to transform\n            features to Q values. If `False`, the final layer\n            output of network must match the action space.\n\n        See :class:`~texar.modules.QNetBase.default_hparams` for details\n        of other hyperparameters.\n        \"\"\"\n        hparams = QNetBase.default_hparams()\n        hparams.update({\n            'action_space': 2,\n            'make_output_layer': True})\n        return hparams\n\n    def _append_output_layer(self):\n        if not self._hparams.make_output_layer:\n            return\n\n        if self._action_space.shape != ():\n            raise ValueError('Only scalar discrete action is supported.')\n        else:\n            output_size = self._action_space.high - self._action_space.low\n\n        layer_hparams = {\n            'type': 'Dense',\n            'kwargs': {'units': output_size}}\n        self._network.append_layer(layer_hparams)\n\n    def _build(self, inputs, mode=None):\n        \"\"\"Takes in states and outputs Q values.\n\n        Args:\n            inputs: Inputs to the Q net with the first dimension\n                the batch dimension.\n            mode (optional): A tensor taking value in\n                :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, including\n                `TRAIN`, `EVAL`, and `PREDICT`. If `None`,\n                :func:`texar.global_mode` is used.\n\n        Returns\n            A `dict` including fields `\"qvalues\"`.\n            where\n\n            - **\"qvalues\"**: A Tensor of shape \\\n            `[batch_size] + action_space size` containing Q values of all\\\n            possible actions.\n        \"\"\"\n        outputs = {\n            \"qvalues\": self._network(inputs, mode=mode)\n        }\n\n        if not self._built:\n            self._add_internal_trainable_variables()\n            self._add_trainable_variable(self._network.trainable_variables)\n            self._built = True\n\n        return outputs\n\n    @property\n    def action_space(self):\n        \"\"\"An instance of :class:`~texar.agents.Space` specifiying the\n        action space.\n        \"\"\"\n        return self._action_space\n"
  },
  {
    "path": "texar_repo/texar/run/__init__.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nModules of texar library run.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=wildcard-import\n\nfrom texar.run.executor import *\n"
  },
  {
    "path": "texar_repo/texar/run/executor.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nA class that executes training, evaluation, prediction, export of estimators.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom texar.utils.dtypes import maybe_hparams_to_dict\n\n# pylint: disable=too-many-instance-attributes, too-many-arguments\n\n__all__ = [\n    \"Executor\"\n]\n\nclass Executor(object):\n    \"\"\"Class that executes training, evaluation, prediction, export, and other\n    actions of :tf_main:`Estimator <estimator/Estimator>`.\n\n    Args:\n        model: An instance of a subclass of\n            :class:`~texar.models.model_base.ModelBase`.\n        data_hparams: A `dict` or an instance of :class:`~texar.hparams.HParams`\n            containing the hyperparameters of data. It must contain `train`\n            and/or `eval` fields for relevant processes. For example, for\n            :meth:`train_and_evaluate`, both fields are required.\n        config: An instance of\n            :tf_main:`tf.estimator.RunConfig <estimator/RunConfig>`, used as\n            the :attr:`config` argument of\n            :tf_main:`Estimator <estimator/Estimator#__init__>`.\n        model_hparams (optional): A `dict` or an instance of\n            :class:`~texar.hparams.HParams` containing the hyperparameters of\n            the model. If `None`, uses :attr:`model.hparams`. Used as\n            the :attr:`params` argument of\n            :tf_main:`Estimator <estimator/Estimator#__init__>`.\n        train_hooks (optional): Iterable of :tf_main:`tf.train.SessionRunHook\n            <train/SessionRunHook>` objects to run during training.\n        eval_hooks (optional): Iterable of :tf_main:`tf.train.SessionRunHook\n            <train/SessionRunHook>` objects to run during evaluation.\n        session_config (optional): An instance of\n            :tf_main:`tf.ConfigProto <ConfigProto>`, used as the :attr:`config`\n            argument of :tf_main:`tf session <Session>`.\n\n    Example:\n\n        .. code-block:: python\n\n            model = BasicSeq2seq(data_hparams, model_hparams)\n            exor = Executor(\n                model=model,\n                data_hparams=data_hparams,\n                config=run_config)\n            exor.train_and_evaluate(\n                max_train_steps=10000,\n                eval_steps=100)\n\n    See `bin/train.py` for the usage in detail.\n    \"\"\"\n\n    def __init__(self,\n                 model,\n                 data_hparams,\n                 config,\n                 model_hparams=None,\n                 train_hooks=None,\n                 eval_hooks=None,\n                 session_config=None):\n        self._model = model\n        self._data_hparams = maybe_hparams_to_dict(data_hparams)\n        self._config = config\n        self._train_hooks = train_hooks\n        self._eval_hooks = eval_hooks\n        self._session_config = session_config\n\n        if model_hparams is None:\n            model_hparams = model.hparams\n        self._model_hparams = maybe_hparams_to_dict(model_hparams)\n\n        self._estimator = tf.estimator.Estimator(\n            model_fn=self._model, config=config, params=self._model_hparams)\n\n    def _get_train_spec(self, max_steps=None):\n        if 'train' not in self._data_hparams:\n            raise ValueError('`data_hparams` must contain field `train` for '\n                             'training data config.')\n        input_fn = self._model.get_input_fn(\n            mode=tf.estimator.ModeKeys.TRAIN,\n            hparams=self._data_hparams['train'])\n        return tf.estimator.TrainSpec(\n            input_fn=input_fn,\n            max_steps=max_steps,\n            hooks=self._train_hooks)\n\n    def _get_eval_spec(self, steps):\n        if 'eval' not in self._data_hparams:\n            raise ValueError('`data_hparams` must contain field `eval` for '\n                             'evaluation data config.')\n        input_fn = self._model.get_input_fn(\n            mode=tf.estimator.ModeKeys.EVAL,\n            hparams=self._data_hparams['eval'])\n        return tf.estimator.EvalSpec(\n            input_fn=input_fn,\n            steps=steps,\n            hooks=self._eval_hooks)\n\n    def train(self, max_steps=None):\n        \"\"\"Trains the model. See :tf_main:`tf.estimator.Estimator.train\n        <estimator/Estimator#train>` for more details.\n\n        Args:\n            max_steps (int, optional): Total number of steps for which\n                to train model. If `None`, train forever or until the train\n                data generates the OutOfRange exception. If OutOfRange occurs\n                in the middle, training stops before :attr:`max_steps` steps.\n        \"\"\"\n        train_spec = self._get_train_spec(max_steps=max_steps)\n        self._estimator.train(\n            input_fn=train_spec.input_fn,\n            hooks=train_spec.hooks,\n            max_steps=train_spec.max_steps)\n\n    def evaluate(self, steps=None, checkpoint_path=None):\n        \"\"\"Evaluates the model. See :tf_main:`tf.estimator.Estimator.evaluate\n        <estimator/Estimator#evaluate>` for more details.\n\n        Args:\n            steps (int, optional): Number of steps for which to evaluate\n                model. If `None`, evaluates until the eval data raises an\n                OutOfRange exception.\n            checkpoint_path (str, optional): Path of a specific checkpoint to\n                evaluate. If `None`, the the latest checkpoint in\n                :attr:`config.model_dir` is used. If there are no checkpoints\n                in :attr:`model_dir`, evaluation is run with newly initialized\n                variables instead of restored from checkpoint.\n        \"\"\"\n        eval_spec = self._get_eval_spec(steps=steps)\n        self._estimator.evaluate(\n            input_fn=eval_spec.input_fn,\n            steps=eval_spec.steps,\n            hooks=eval_spec.hooks,\n            checkpoint_path=checkpoint_path)\n\n    def train_and_evaluate(self, max_train_steps=None, eval_steps=None):\n        \"\"\"Trains and evaluates the model. See\n        :tf_main:`tf.estimator.train_and_evaluate\n        <estimator/train_and_evaluate>` for more details.\n\n        Args:\n            max_train_steps (int, optional): Total number of steps for which\n                to train model. If `None`, train forever or until the train\n                data generates the OutOfRange exception. If OutOfRange occurs\n                in the middle, training stops before :attr:`max_steps` steps.\n            eval_steps (int, optional): Number of steps for which to evaluate\n                model. If `None`, evaluates until the eval data raises an\n                OutOfRange exception.\n        \"\"\"\n        train_spec = self._get_train_spec(max_steps=max_train_steps)\n        eval_spec = self._get_eval_spec(steps=eval_steps)\n        tf.estimator.train_and_evaluate(self._estimator, train_spec, eval_spec)\n\n"
  },
  {
    "path": "texar_repo/texar/run/executor_test.py",
    "content": "# -*- coding: utf-8 -*-\n#\n\"\"\"\nUnit tests for executor.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport tempfile\nimport shutil\n\nimport tensorflow as tf\n\nfrom texar.run.executor import Executor\nfrom texar.models.seq2seq.basic_seq2seq import BasicSeq2seq\n\nclass ExecutorTest(tf.test.TestCase):\n    \"\"\"Tests :class:`texar.run.executor.Executor`\n    \"\"\"\n\n    def setUp(self):\n        tf.test.TestCase.setUp(self)\n\n        # Create data\n        vocab_list = ['This', 'is', 'a', 'word', '词']\n        vocab_file = tempfile.NamedTemporaryFile()\n        vocab_file.write('\\n'.join(vocab_list).encode(\"utf-8\"))\n        vocab_file.flush()\n        self._vocab_file = vocab_file\n        self._vocab_size = len(vocab_list)\n\n        src_text = ['This is a sentence from source .', '词 词 。 source']\n        src_text_file = tempfile.NamedTemporaryFile()\n        src_text_file.write('\\n'.join(src_text).encode(\"utf-8\"))\n        src_text_file.flush()\n        self._src_text_file = src_text_file\n\n        tgt_text = ['This is a sentence from target .', '词 词 。 target']\n        tgt_text_file = tempfile.NamedTemporaryFile()\n        tgt_text_file.write('\\n'.join(tgt_text).encode(\"utf-8\"))\n        tgt_text_file.flush()\n        self._tgt_text_file = tgt_text_file\n\n        self._data_hparams = {\n            \"num_epochs\": 20,\n            \"batch_size\": 2,\n            \"source_dataset\": {\n                \"files\": [self._src_text_file.name],\n                \"vocab_file\": self._vocab_file.name,\n            },\n            \"target_dataset\": {\n                \"files\": self._tgt_text_file.name,\n                \"vocab_share\": True,\n            }\n        }\n\n    def test_execute_seq2seq(self):\n        \"\"\"Tests running seq2seq with Executor.\n        \"\"\"\n        seq2seq = BasicSeq2seq(self._data_hparams)\n        data_hparams = {'train': self._data_hparams, 'eval': self._data_hparams}\n\n        model_dir = tempfile.mkdtemp()\n        config = tf.estimator.RunConfig(\n            model_dir=model_dir,\n            save_summary_steps=10,\n            save_checkpoints_steps=10,\n            save_checkpoints_secs=None)\n\n        exor = Executor(model=seq2seq, data_hparams=data_hparams, config=config)\n\n        exor.train_and_evaluate(max_train_steps=20, eval_steps=5)\n\n        exor.train(max_steps=20)\n        exor.evaluate(steps=5)\n\n        shutil.rmtree(model_dir)\n\nif __name__ == \"__main__\":\n    tf.test.main()\n"
  },
  {
    "path": "texar_repo/texar/utils/__init__.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nModules of texar library utils.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=wildcard-import\n\nfrom texar.utils.utils import *\nfrom texar.utils.exceptions import *\nfrom texar.utils.shapes import *\nfrom texar.utils.dtypes import *\nfrom texar.utils.variables import *\nfrom texar.utils.mode import *\nfrom texar.utils.average_recorder import *\nfrom texar.utils.utils_io import *\n"
  },
  {
    "path": "texar_repo/texar/utils/average_recorder.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nUtilities for maintaining moving average.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\n\nfrom collections import deque\n\n# pylint: disable=invalid-name\n\n__all__ = [\n    \"_SingleAverageRecorder\",\n    \"AverageRecorder\"\n]\n\nclass _SingleAverageRecorder(object):\n    \"\"\"Maintains the moving average (i.e., the average of the latest N records)\n    of a single metric.\n\n    Args:\n        size (int, optional): The window size of moving average. If `None`,\n            the average of all added records is maintained.\n        name (str, optional): name of the recorder. Used when printing.\n    \"\"\"\n\n    def __init__(self, size=None, name=None):\n        if size is not None and size <= 0:\n            raise ValueError(\"`size` must be > 0 or `None`.\")\n        self._size = size\n        self._q = deque([])\n        self._w = deque([])\n        self._sum = 0.\n        self._w_sum = 0\n        self._name = name\n\n    def add(self, record, weight=None):\n        \"\"\"Appends a new record.\n\n        Args:\n            record: A scalar; the new record to append.\n            weight (optional): A scalar, weight of the new record for\n                calculating a weighted average. If `None`, weight is set to `1`.\n                For example, :attr:`weight` can be set to batch size and\n                :attr:`record` the average value of certain metric on the batch\n                in order to calculate the average metric value on a whole\n                dataset.\n\n        Returns:\n            The (moving) average after appending the record.\n        \"\"\"\n        w = weight if weight is not None else 1\n        self._w_sum += w\n        self._sum += record * w\n\n        if self._size is not None:\n            if len(self._q) == self._size:\n                w_pop = self._w.popleft()\n                self._sum -= self._q.popleft() * w_pop\n                self._w_sum -= w_pop\n            self._q.append(record)\n            self._w.append(w)\n\n        return self.avg()\n\n    def avg(self):\n        \"\"\"Returns the (moving) average.\n        \"\"\"\n        if self._w_sum == 0:\n            return 0.\n        return self._sum / self._w_sum\n\n    def reset(self):\n        \"\"\"Cleans all records.\n        \"\"\"\n        self._q.clear()\n        self._w.clear()\n        self._sum = 0.\n        self._w_sum = 0\n\n    def to_str(self, precision=None):\n        \"\"\"Returns a string of the average value.\n\n        Args:\n            precision (int, optional): The number of decimal places to keep in\n                the returned string. E.g., for an average value of `0.1234`,\n                :attr:`precision = 2` leads to `'0.12'`.\n\n        Returns:\n            A string of the average value. If :meth:`name` is given, the\n            string is of the format like `'name: 0.1234'`, otherwise\n            the string is of the format like `'0.1234'`.\n        \"\"\"\n        prec_str = \"{}\"\n        if precision is not None:\n            prec_str = \"{:.%df}\" % precision\n\n        avg_str = prec_str.format(self.avg())\n        if self._name is not None:\n            avg_str = \"{}: {}\".format(self._name, avg_str)\n\n        return avg_str\n\n    @property\n    def name(self):\n        \"\"\"The name of the recorder.\n        \"\"\"\n        return self.name\n\nclass AverageRecorder(object):\n    \"\"\"Maintains the moving averages (i.e., the average of the latest N\n    records) of (possibly multiple) fields.\n\n    Fields are determined by the first call of :meth:`add`.\n\n    Args:\n        size (int, optional): The window size of moving average. If `None`,\n            the average of all added records is maintained.\n\n    Example:\n\n        .. code-block:: python\n\n            ## Use to maintain moving average of training loss\n            avg_rec = AverageRecorder(size=10) # average over latest 10 records\n            while training:\n                loss_0, loss_1  = ...\n                avg_rec.add([loss_0, loss_1])\n                # avg_rec.avg() == [0.12343452, 0.567800323]\n                # avg_rec.avg(0) == 0.12343452\n                # avg_rec.to_str(precision=2, ) == '0.12 0.57'\n\n            ## Use to maintain average of test metrics on the whole test set\n            avg_rec = AverageRecorder() # average over ALL records\n            while test:\n                metric_0, metric_1  = ...\n                avg_rec.add({'m0': metric_0, 'm1': metric_1}) # dict is allowed\n            print(avg_rec.to_str(precision=4, delimiter=' , '))\n            # 'm0: 0.1234 , m1: 0.5678'\n            #\n            # avg_rec.avg() == {'m0': 0.12343452, 'm1': 0.567800323}\n            # avg_rec.avg(0) == 0.12343452\n\n    \"\"\"\n\n    def __init__(self, size=None):\n        if size is not None and size <= 0:\n            raise ValueError(\"`size` must be > 0 or `None`.\")\n        self._size = size\n        self._recorders = None\n        self._default_metric_name = \"metric\"\n        self._record_type = None\n\n    def _to_dict(self, record):\n        if isinstance(record, dict):\n            record_dict = record\n        elif isinstance(record, (list, tuple)):\n            record_dict = {i: vi for i, vi in enumerate(record)}\n        else:\n            record_dict = {self._default_metric_name: record}\n        return record_dict\n\n    def add(self, record, weight=None):\n        \"\"\"Appends a new record.\n\n        :attr:`record` can be a `list`, `dict`, or a single scalar. The\n        record type is determined at the first time :meth:`add` is called.\n        All subsequent calls to :meth:`add` must have the same type of\n        :attr:`record`.\n\n        :attr:`record` in subsequent calls to :meth:`add` can contain only\n        a subset of fields than the first call to :meth:`add`.\n\n        Example:\n\n            .. code-block:: python\n\n                recorder.add({'1': 0.2, '2': 0.2}) # 1st call to `add`\n                x = recorder.add({'1': 0.4}) # 2nd call to `add`\n                # x == {'1': 0.3, '2': 0.2}\n\n        Args:\n            record: A single scalar, a list of scalars, or a dict of scalars.\n            weight (optional): A scalar, weight of the new record for\n                calculating a weighted average. If `None`, weight is set to `1`.\n                For example, :attr:`weight` can be set to batch size and\n                :attr:`record` the average value of certain metrics on the batch\n                in order to calculate the average metric values on a whole\n                dataset.\n\n        Returns:\n            The (moving) average after appending the record, with the same\n            type as :attr:`record`.\n        \"\"\"\n        if self._record_type is None:\n            self._record_type = type(record)\n        elif self._record_type != type(record):\n            raise ValueError('The type of `record` is not consistent. '\n                             'Expect type `{}`'.format(self._record_type))\n\n        record_dict = self._to_dict(record)\n        if self._recorders is None:\n            self._recorders = {\n                name: _SingleAverageRecorder(\n                    self._size, name if self._record_type == dict else None)\n                for name in record_dict.keys()\n            }\n\n        for name, val in record_dict.items():\n            self._recorders[name].add(val, weight=weight)\n\n        return self.avg()\n\n    def avg(self, id_or_name=None):\n        \"\"\"Returns the (moving) average.\n\n        Args:\n            id_or_name (optional): A list of or a single element.\n                Each element is the index (if the record type is `list`) or\n                name (if the record type is `dict`) of the field for which\n                the average is calculated. If not given, the average of all\n                fields are returned.\n\n        Returns:\n            The average value(s). If :attr:`id_or_name` is a single element\n            (not a list), then returns the average value of the corresponding\n            field. Otherwise, if :attr:`id_or_name` is a list of element(s),\n            then returns average value(s) in the same type as :attr:`record`\n            of :meth:`add`.\n        \"\"\"\n        if self._recorders is None:\n            return 0.\n\n        keys = id_or_name\n        if keys is None:\n            keys = list(self._recorders.keys())\n\n        if not isinstance(keys, (list, tuple)):\n            return self._recorders[keys].avg()\n\n        avg = {key: self._recorders[key].avg() for key in keys}\n        if self._record_type in {list, tuple}:\n            ret_avg = []\n            for k, v in avg.items():\n                if k in keys:\n                    ret_avg.append(v)\n            return self._record_type(ret_avg)\n        elif self._record_type == dict:\n            return avg\n        else:\n            return avg[self._default_metric_name]\n\n    def reset(self, id_or_name=None):\n        \"\"\"Resets the record.\n\n        Args:\n            id_or_name (optional): A list or a single element. Each element is\n                the index (if the record type is `list`) or name (if the\n                record type is `dict`) of the field to reset.\n                If `None`, all fields are reset.\n        \"\"\"\n        keys = id_or_name\n        if keys is None:\n            keys = list(self._recorders.keys())\n        elif not isinstance(keys, (list, tuple)):\n            keys = [keys]\n\n        for key in keys:\n            self._recorders[key].reset()\n\n    def to_str(self, precision=None, delimiter=' '):\n        \"\"\"Returns a string of the average values of the records.\n\n        Args:\n            precision (int, optional): The number of decimal places to keep in\n                the returned string. E.g., for an average value of `0.1234`,\n                :attr:`precision = 2` leads to `'0.12'`.\n            delimiter (str): The delimiter string that separates between\n                fields.\n\n        Returns:\n            A string of the average values.\n\n            If record is of type `dict`, the string is a concatenation of\n            'field_name: average_value', delimited with :attr:`delimiter`.\n            E.g., `'field_name_1: 0.1234 field_name_2: 0.5678 ...'`.\n\n            Otherwise, the string is of a concatenation of 'average_value'.\n            E.g., `'0.1234 0.5678 ...'`\n        \"\"\"\n        strs = {name: rec.to_str(precision=precision)\n                for name, rec in self._recorders.items()}\n        str_list = []\n        if self._record_type in {list, tuple}:\n            for i in range(len(strs)):# pylint: disable=consider-using-enumerate\n                # Enumerates the keys in order, which are the indexes\n                str_list.append(strs[i])\n        elif self._record_type == dict:\n            str_list = list(strs.values())\n        else:\n            str_list = [strs[self._default_metric_name]]\n\n        avg_str = delimiter.join(str_list)\n\n        return avg_str\n"
  },
  {
    "path": "texar_repo/texar/utils/average_recorder_test.py",
    "content": "\"\"\"\nUnit tests for average recoder.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom texar.utils.average_recorder import _SingleAverageRecorder, AverageRecorder\n\n\nclass AverageRecorderTest(tf.test.TestCase):\n    \"\"\"Tests average recoder.\n    \"\"\"\n\n    def test_single_average_recoder(self):\n        \"\"\"Tests :class:`~texar.utils._SingleAverageRecorder`\n        \"\"\"\n        recoder = _SingleAverageRecorder(5)\n        for i in range(100):\n            self.assertEqual(recoder.add(1), 1.)\n            self.assertEqual(recoder.avg(), 1.)\n\n        recoder = _SingleAverageRecorder()\n        for i in range(100):\n            self.assertEqual(recoder.add(1), 1.)\n            self.assertEqual(recoder.avg(), 1.)\n\n        def _cal_ground_truth(n):\n            \"\"\"Calculates ((n-4)^2 + ... + n^5) / (n-4 + ... + n)\n            \"\"\"\n            lb = max(n-4, 0)\n            _sum = 0\n            _w = 0\n            for i in range(lb, n+1):\n                _sum += i * i\n                _w += i\n            if _w == 0:\n                return 0\n            return _sum / _w\n\n        recoder = _SingleAverageRecorder(5)\n        for i in range(100):\n            self.assertEqual(recoder.add(i, i), _cal_ground_truth(i))\n            self.assertEqual(recoder.avg(), _cal_ground_truth(i))\n\n    def test_average_recorder(self):\n        \"\"\"Tests :class:`~texar.utils.AverageRecorder`\n        \"\"\"\n        recorder = AverageRecorder(5)\n        for i in range(100):\n            self.assertEqual(recorder.add([1., 2.]), [1., 2.])\n            self.assertEqual(recorder.add([1.]), [1., 2.])\n            self.assertEqual(recorder.avg(), [1., 2.])\n            self.assertEqual(recorder.avg(0), 1.)\n            self.assertEqual(recorder.avg(1), 2.)\n            self.assertEqual(recorder.avg([0, 1]), [1., 2.])\n\n        recorder = AverageRecorder()\n        for i in range(100):\n            self.assertEqual(recorder.add({'1': 1, '2': 2}), {'1': 1., '2': 2.})\n            self.assertEqual(recorder.add({'1': 1}), {'1': 1., '2': 2.})\n            self.assertEqual(recorder.avg(), {'1': 1., '2': 2.})\n            self.assertEqual(recorder.avg('1'), 1.)\n            self.assertEqual(recorder.avg('2'), 2.)\n            self.assertEqual(recorder.avg(['1', '2']), {'1': 1., '2': 2.})\n\nif __name__ == \"__main__\":\n    tf.test.main()\n\n"
  },
  {
    "path": "texar_repo/texar/utils/beam_search.py",
    "content": "# coding=utf-8\n# Copyright 2018 The Tensor2Tensor Authors.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#         http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n# Modifications copyright (C) 2018 Texar\n# ==============================================================================\n\"\"\"\nImplemetation of beam seach with penalties.\nAdapted from tensor2tensor repositor.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom tensorflow.python.util import nest\nfrom texar.utils.shapes import shape_list\n\n# Default value for INF\nINF = 1. * 1e7\n\ndef _merge_beam_dim(tensor):\n    \"\"\"Reshapes first two dimensions in to single dimension.\n\n    Args:\n        tensor: Tensor to reshape of shape [A, B, ...]\n\n    Returns:\n        Reshaped tensor of shape [A*B, ...]\n    \"\"\"\n    shape = shape_list(tensor)\n    shape[0] *= shape[1]    # batch -> batch * beam_size\n    shape.pop(1)    # Remove beam dim\n    return tf.reshape(tensor, shape)\n\n\ndef _unmerge_beam_dim(tensor, batch_size, beam_size):\n    \"\"\"Reshapes first dimension back to [batch_size, beam_size].\n\n    Args:\n        tensor: Tensor to reshape of shape [batch_size*beam_size, ...]\n        batch_size: Tensor, original batch size.\n        beam_size: int, original beam size.\n\n    Returns:\n        Reshaped tensor of shape [batch_size, beam_size, ...]\n    \"\"\"\n    shape = shape_list(tensor)\n    new_shape = [batch_size] + [beam_size] + shape[1:]\n    return tf.reshape(tensor, new_shape)\n\n\ndef _expand_to_beam_size(tensor, beam_size):\n    \"\"\"Tiles a given tensor by beam_size.\n\n    Args:\n        tensor: tensor to tile [batch_size, ...]\n        beam_size: How much to tile the tensor by.\n\n    Returns:\n        Tiled tensor [batch_size, beam_size, ...]\n    \"\"\"\n    tensor = tf.expand_dims(tensor, axis=1)\n    tile_dims = [1] * tensor.shape.ndims\n    tile_dims[1] = beam_size\n\n    return tf.tile(tensor, tile_dims)\n\n\ndef get_state_shape_invariants(tensor):\n    \"\"\"Returns the shape of the tensor but sets middle dims to None.\"\"\"\n    shape = tensor.shape.as_list()\n    for i in range(1, len(shape) - 1):\n        shape[i] = None\n    return tf.TensorShape(shape)\n\n\ndef log_prob_from_logits(logits):\n    return logits - tf.reduce_logsumexp(logits, axis=-1, keepdims=True)\n\n\ndef compute_batch_indices(batch_size, beam_size):\n    \"\"\"Computes the i'th coodinate that contains the batch index for\n    gathers.\n\n    Batch pos is a tensor like [[0,0,0,0,],[1,1,1,1],..]. It says which\n    batch the beam item is in. This will create the i of the i,j coordinate\n    needed for the gather.\n\n    Args:\n        batch_size: Batch size\n        beam_size: Size of the beam.\n    Returns:\n        batch_pos: [batch_size, beam_size] tensor of ids\n    \"\"\"\n    batch_pos = tf.range(batch_size * beam_size) // beam_size\n    batch_pos = tf.reshape(batch_pos, [batch_size, beam_size])\n    return batch_pos\n\n\ndef compute_topk_scores_and_seq(sequences, scores, scores_to_gather, flags,\n                                beam_size, batch_size, prefix=\"default\",\n                                states_to_gather=None):\n    \"\"\"Given sequences and scores, will gather the top k=beam size\n    sequences.\n\n    This function is used to grow alive, and finished. It takes sequences,\n    scores, and flags, and returns the top k from sequence\n    scores_to_gather, and flags based on the values in scores.\n\n    This method permits easy introspection using tfdbg. It adds three\n    named ops that are prefixed by `prefix`:\n        - _topk_seq: the tensor for topk_seq returned by this method.\n        - _topk_flags: the tensor for topk_finished_flags returned by this\n            method.\n        - _topk_scores: the tensor for tokp_gathered_scores returned by\n            this method.\n\n    Args:\n        sequences: Tensor of sequences that we need to gather from.\n            [batch_size, beam_size, seq_length]\n        scores: Tensor of scores for each sequence in sequences.\n            [batch_size, beam_size]. We will use these to compute the topk.\n        scores_to_gather: Tensor of scores for each sequence in sequences.\n            [batch_size, beam_size]. We will return the gathered scores\n            from here.\n            Scores to gather is different from scores because for\n            grow_alive, we will need to return log_probs, while for\n            grow_finished, we will need to return the length penalized\n            scors.\n        flags: Tensor of bools for sequences that say whether a sequence\n        has reached EOS or not\n        beam_size: int\n        batch_size: int\n        prefix: string that will prefix unique names for the ops run.\n        states_to_gather: dict (possibly nested) of decoding states.\n    Returns:\n        Tuple of\n        (topk_seq [batch_size, beam_size, decode_length],\n         topk_gathered_scores [batch_size, beam_size],\n         topk_finished_flags[batch_size, beam_size])\n    \"\"\"\n    _, topk_indexes = tf.nn.top_k(scores, k=beam_size)\n    # The next three steps are to create coordinates for tf.gather_nd to\n    # pull out the topk sequences from sequences based on scores.\n    # batch pos is a tensor like [[0,0,0,0,],[1,1,1,1],..]. It says which\n    # batch the beam item is in. This will create the i of the i,j\n    # coordinate needed for the gather\n    batch_pos = compute_batch_indices(batch_size, beam_size)\n\n    # top coordinates will give us the actual coordinates to do the gather.\n    # stacking will create a tensor of dimension batch * beam * 2, where\n    # the last dimension contains the i,j gathering coordinates.\n    top_coordinates = tf.stack([batch_pos, topk_indexes], axis=2)\n\n    # Gather up the highest scoring sequences.    For each operation\n    # added, give it a concrete name to simplify observing these\n    # operations with tfdbg. Clients can capture these tensors by watching\n    # these node names.\n    def gather(tensor, name):\n        return tf.gather_nd(tensor, top_coordinates, name=(prefix + name))\n    topk_seq = gather(sequences, \"_topk_seq\")\n    topk_flags = gather(flags, \"_topk_flags\")\n    topk_gathered_scores = gather(scores_to_gather, \"_topk_scores\")\n    if states_to_gather:\n        topk_gathered_states = nest.map_structure(\n            lambda state: gather(state, \"_topk_states\"), states_to_gather)\n    else:\n        topk_gathered_states = states_to_gather\n    return topk_seq, topk_gathered_scores, topk_flags, topk_gathered_states\n\n\ndef beam_search(symbols_to_logits_fn,\n                                initial_ids,\n                                beam_size,\n                                decode_length,\n                                vocab_size,\n                                alpha,\n                                eos_id,\n                                states=None,\n                                stop_early=True):\n    \"\"\"Beam search with length penalties.\n\n    Requires a function that can take the currently decoded sybmols and\n    return the logits for the next symbol. The implementation is inspired\n    by https://arxiv.org/abs/1609.08144.\n\n    When running, the beam search steps can be visualized by using tfdbg to\n    watch the operations generating the output ids for each beam step.\n    These operations have the pattern:\n        (alive|finished)_topk_(seq,scores)\n\n    Operations marked `alive` represent the new beam sequences that will be\n    processed in the next step.    Operations marked `finished` represent\n    the completed beam sequences, which may be padded with 0s if no beams\n    finished.\n\n    Operations marked `seq` store the full beam sequence for the time step.\n    Operations marked `scores` store the sequence's final log scores.\n\n    The beam search steps will be processed sequentially in order, so when\n    capturing observed from these operations, tensors, clients can make\n    assumptions about which step is being recorded.\n\n    WARNING: Assumes 2nd dimension of tensors in `states` and not\n    invariant, this means that the shape of the 2nd dimension of these\n    tensors will not be available (i.e. set to None) inside\n    symbols_to_logits_fn.\n\n    Args:\n        symbols_to_logits_fn: Interface to the model, to provide logits.\n                Shoud take [batch_size, decoded_ids] and return\n                [batch_size, vocab_size]\n        initial_ids: Ids to start off the decoding, this will be the first\n        thing handed to symbols_to_logits_fn (after expanding to beam size)\n            [batch_size]\n        beam_size: Size of the beam.\n        decode_length: Number of steps to decode for.\n        vocab_size: Size of the vocab, must equal the size of the logits\n        returned by symbols_to_logits_fn\n        alpha: alpha for length penalty.\n        states: dict (possibly nested) of decoding states.\n        eos_id: ID for end of sentence.\n        stop_early: a boolean - stop once best sequence is provably\n        determined.\n    Returns:\n        Tuple of\n        (decoded beams [batch_size, beam_size, decode_length]\n         decoding probablities [batch_size, beam_size])\n    \"\"\"\n    batch_size = shape_list(initial_ids)[0]\n\n    # Assume initial_ids are prob 1.0\n    initial_log_probs = tf.constant([[0.] + [-float(\"inf\")] * (\n        beam_size - 1)])\n    # Expand to beam_size (batch_size, beam_size)\n    alive_log_probs = tf.tile(initial_log_probs, [batch_size, 1])\n\n    # Expand each batch and state to beam_size\n    alive_seq = _expand_to_beam_size(initial_ids, beam_size)\n    alive_seq = tf.expand_dims(alive_seq, axis=2)\n    #(batch_size, beam_size, 1)\n    if states:\n        states = nest.map_structure(\n            lambda state: _expand_to_beam_size(state, beam_size),\n                states)\n    else:\n        states = {}\n\n    # Finished will keep track of all the sequences that have finished so\n    # far\n    # Finished log probs will be negative infinity in the beginning\n    # finished_flags will keep track of booleans\n    finished_seq = tf.zeros(shape_list(alive_seq), tf.int32)\n    # Setting the scores of the initial to negative infinity.\n    finished_scores = tf.ones([batch_size, beam_size]) * -INF\n    finished_flags = tf.zeros([batch_size, beam_size], tf.bool)\n\n    def grow_finished(finished_seq, finished_scores, finished_flags,\n        curr_seq, curr_scores, curr_finished):\n        \"\"\"Given sequences and scores, will gather the top k=beam size\n        sequences.\n\n        Args:\n            finished_seq: Current finished sequences.\n                [batch_size, beam_size, current_decoded_length]\n            finished_scores: scores for each of these sequences.\n                [batch_size, beam_size]\n            finished_flags: finished bools for each of these sequences.\n                [batch_size, beam_size]\n            curr_seq: current topk sequence that has been grown by one\n            position.\n                [batch_size, beam_size, current_decoded_length]\n            curr_scores: scores for each of these sequences. [batch_size,\n            beam_size]\n            curr_finished: Finished flags for each of these sequences.\n                [batch_size, beam_size]\n        Returns:\n            Tuple of\n                (Topk sequences based on scores,\n                 log probs of these sequences,\n                 Finished flags of these sequences)\n        \"\"\"\n        # First append a column of 0'ids to finished to make the same\n        # length with finished scores\n        finished_seq = tf.concat(\n                [finished_seq,\n                 tf.zeros([batch_size, beam_size, 1], tf.int32)], axis=2)\n\n        # Set the scores of the unfinished seq in curr_seq to large\n        # negative values\n        curr_scores += (1. - tf.to_float(curr_finished)) * -INF\n        # concatenating the sequences and scores along beam axis\n        curr_finished_seq = tf.concat([finished_seq, curr_seq], axis=1)\n        curr_finished_scores = tf.concat([finished_scores, curr_scores],\n            axis=1)\n        curr_finished_flags = tf.concat([finished_flags, curr_finished],\n            axis=1)\n        return compute_topk_scores_and_seq(\n            curr_finished_seq, curr_finished_scores, curr_finished_scores,\n                curr_finished_flags, beam_size, batch_size,\n                \"grow_finished\")\n\n    def grow_alive(curr_seq, curr_scores, curr_log_probs, curr_finished,\n        states):\n        \"\"\"Given sequences and scores, will gather the top k=beam size\n        sequences.\n\n        Args:\n            curr_seq: current topk sequence that has been grown by one\n            position.\n                [batch_size, beam_size, i+1]\n            curr_scores: scores for each of these sequences. [batch_size,\n                beam_size]\n            curr_log_probs: log probs for each of these sequences.\n                [batch_size, beam_size]\n            curr_finished: Finished flags for each of these sequences.\n                [batch_size, beam_size]\n            states: dict (possibly nested) of decoding states.\n        Returns:\n            Tuple of\n                (Topk sequences based on scores,\n                 log probs of these sequences,\n                 Finished flags of these sequences)\n        \"\"\"\n        # Set the scores of the finished seq in curr_seq to large negative\n        # values\n        curr_scores += tf.to_float(curr_finished) * -INF\n        return compute_topk_scores_and_seq(curr_seq, curr_scores,\n            curr_log_probs, curr_finished, beam_size, batch_size,\n            \"grow_alive\", states)\n\n    def grow_topk(i, alive_seq, alive_log_probs, states):\n        r\"\"\"Inner beam seach loop.\n\n        This function takes the current alive sequences, and grows them to\n        topk sequences where k = 2*beam. We use 2*beam because, we could\n        have beam_size number of sequences that might hit <EOS> and there\n        will be no alive sequences to continue. With 2*beam_size, this\n        will not happen. This relies on the assumption the vocab size is >\n        beam size. If this is true, we'll have at least beam_size non\n        <EOS> extensions if we extract the next top 2*beam words.\n        Length penalty is given by = (5+len(decode)/6) ^ -\\alpha.\n        Pls refer to https://arxiv.org/abs/1609.08144.\n\n        Args:\n            i: loop index\n            alive_seq: Topk sequences decoded so far [batch_size,\n                beam_size, i+1]\n            alive_log_probs: probabilities of these sequences.\n                [batch_size, beam_size]\n            states: dict (possibly nested) of decoding states.\n        Returns:\n            Tuple of\n                (Topk sequences extended by the next word,\n                 The log probs of these sequences,\n                 The scores with length penalty of these sequences,\n                 Flags indicating which of these sequences have finished\n                 decoding, dict of transformed decoding states)\n        \"\"\"\n        # Get the logits for all the possible next symbols\n        flat_ids = tf.reshape(alive_seq, [batch_size * beam_size, -1])\n\n        # (batch_size * beam_size, decoded_length)\n        if states:\n            flat_states = nest.map_structure(_merge_beam_dim, states)\n            flat_logits, flat_states = symbols_to_logits_fn(flat_ids, i,\n                flat_states)\n            states = nest.map_structure(\n                lambda t: _unmerge_beam_dim(t, batch_size, beam_size),\n                flat_states)\n        else:\n            flat_logits = symbols_to_logits_fn(flat_ids)\n        logits = tf.reshape(flat_logits, [batch_size, beam_size, -1])\n\n        # Convert logits to normalized log probs\n        candidate_log_probs = log_prob_from_logits(logits)\n\n        # Multiply the probabilites by the current probabilites of the\n        # beam.\n        # (batch_size, beam_size, vocab_size) + (batch_size, beam_size, 1)\n        log_probs = candidate_log_probs + tf.expand_dims(alive_log_probs,\n            axis=2)\n\n        length_penalty = tf.pow(((5. + tf.to_float(i + 1)) / 6.), alpha)\n\n        curr_scores = log_probs / length_penalty\n        # Flatten out (beam_size, vocab_size) probs in to a list of\n        # possibilites\n        flat_curr_scores = tf.reshape(curr_scores,\n            [-1, beam_size * vocab_size])\n\n        topk_scores, topk_ids = tf.nn.top_k(flat_curr_scores,\n            k=beam_size * 2)\n\n        # Recovering the log probs because we will need to send them back\n        topk_log_probs = topk_scores * length_penalty\n\n        # Work out what beam the top probs are in.\n        topk_beam_index = topk_ids // vocab_size\n        topk_ids %= vocab_size    # Unflatten the ids\n\n        # The next three steps are to create coordinates for tf.gather_nd\n        # to pull out the correct seqences from id's that we need to grow.\n        # We will also use the coordinates to gather the booleans of the\n        # beam items that survived.\n        batch_pos = compute_batch_indices(batch_size, beam_size * 2)\n\n        # top beams will give us the actual coordinates to do the gather.\n        # stacking will create a tensor of dimension batch * beam * 2,\n        # where the last dimension contains the i,j gathering coordinates.\n        topk_coordinates = tf.stack([batch_pos, topk_beam_index], axis=2)\n\n        # Gather up the most probable 2*beams both for the ids and\n        # finished_in_alive bools\n        topk_seq = tf.gather_nd(alive_seq, topk_coordinates)\n        if states:\n            states = nest.map_structure(\n                lambda state: tf.gather_nd(state, topk_coordinates),\n                    states)\n\n        # Append the most probable alive\n        topk_seq = tf.concat([topk_seq, tf.expand_dims(topk_ids, axis=2)],\n            axis=2)\n\n        topk_finished = tf.equal(topk_ids, eos_id)\n\n        return topk_seq, topk_log_probs, topk_scores, topk_finished, states\n\n    def inner_loop(i, alive_seq, alive_log_probs, finished_seq,\n            finished_scores, finished_flags, states):\n        \"\"\"Inner beam seach loop.\n\n        There are three groups of tensors, alive, finished, and topk.\n        The alive group contains information about the current alive\n        sequences. The topk group contains information about alive + topk\n        current decoded words the finished group contains information\n        about finished sentences, that is, the ones that have decoded to\n        <EOS>. These are what we return.\n        The general beam search algorithm is as follows:\n        While we haven't terminated (pls look at termination condition)\n            1. Grow the current alive to get beam*2 topk sequences\n            2. Among the topk, keep the top beam_size ones that haven't\n            reached EOS into alive\n            3. Among the topk, keep the top beam_size ones have reached\n            EOS into finished\n        Repeat\n        To make things simple with using fixed size tensors, we will end\n        up inserting unfinished sequences into finished in the beginning.\n        To stop that we add -ve INF to the score of the unfinished\n        sequence so that when a true finished sequence does appear, it\n        will have a higher score than all the unfinished ones.\n\n        Args:\n            i: loop index\n            alive_seq: Topk sequences decoded so far [batch_size,\n                beam_size, i+1]\n            alive_log_probs: probabilities of the beams. [batch_size,\n                beam_size]\n            finished_seq: Current finished sequences.\n                [batch_size, beam_size, i+1]\n            finished_scores: scores for each of these sequences.\n                [batch_size, beam_size]\n            finished_flags: finished bools for each of these sequences.\n                [batch_size, beam_size]\n            states: dict (possibly nested) of decoding states.\n\n        Returns:\n            Tuple of\n                (Incremented loop index\n                 New alive sequences,\n                 Log probs of the alive sequences,\n                 New finished sequences,\n                 Scores of the new finished sequences,\n                 Flags inidicating which sequence in finished as reached\n                 EOS,\n                 dict of final decoding states)\n        \"\"\"\n\n        # Each inner loop, we carry out three steps:\n        # 1. Get the current topk items.\n        # 2. Extract the ones that have finished and haven't finished\n        # 3. Recompute the contents of finished based on scores.\n        topk_seq, topk_log_probs, topk_scores, topk_finished, states =\\\n            grow_topk(i, alive_seq, alive_log_probs, states)\n        alive_seq, alive_log_probs, _, states = grow_alive(\n            topk_seq, topk_scores, topk_log_probs, topk_finished, states)\n        finished_seq, finished_scores, finished_flags, _ = grow_finished(\n            finished_seq, finished_scores, finished_flags, topk_seq,\n            topk_scores, topk_finished)\n\n        return (i + 1, alive_seq, alive_log_probs, finished_seq,\n            finished_scores, finished_flags, states)\n\n    def _is_finished(i, unused_alive_seq, alive_log_probs,\n            unused_finished_seq, finished_scores, finished_in_finished,\n            unused_states):\n        \"\"\"Checking termination condition.\n\n        We terminate when we decoded up to decode_length or the lowest\n        scoring item in finished has a greater score that the higest prob\n        item in alive divided by the max length penalty\n\n        Args:\n            i: loop index\n            alive_log_probs: probabilities of the beams. [batch_size,\n                beam_size]\n            finished_scores: scores for each of these sequences.\n                [batch_size, beam_size]\n            finished_in_finished: finished bools for each of these\n            sequences. [batch_size, beam_size]\n\n        Returns:\n            Bool.\n        \"\"\"\n        if not stop_early:\n            return tf.less(i, decode_length)\n        max_length_penalty = tf.pow(((5. + tf.to_float(decode_length)) \\\n            / 6.), alpha)\n        # The best possible score of the most likley alive sequence\n        lower_bound_alive_scores = alive_log_probs[:, 0] /\\\n            max_length_penalty\n\n        # Now to compute the lowest score of a finished sequence in\n        # finished\n        # If the sequence isn't finished, we multiply it's score by 0.\n        # since scores are all -ve, taking the min will give us the score\n        # of the lowest finished item.\n        lowest_score_of_fininshed_in_finished = tf.reduce_min(\n            finished_scores * tf.to_float(finished_in_finished),\n            axis=1)\n        # If none of the sequences have finished, then the min will be 0\n        # and we have to replace it by -ve INF if it is. The score of any\n        # seq in alive will be much higher than -ve INF and the\n        # termination condition will not be met.\n        lowest_score_of_fininshed_in_finished += (\n            (1. - tf.to_float(tf.reduce_any(finished_in_finished,\n            1))) * -INF)\n\n        bound_is_met = tf.reduce_all(\n            tf.greater(lowest_score_of_fininshed_in_finished,\n            lower_bound_alive_scores))\n\n        return tf.logical_and(\n            tf.less(i, decode_length), tf.logical_not(bound_is_met))\n\n    (_, alive_seq, alive_log_probs, finished_seq, finished_scores,\n     finished_flags, _) = tf.while_loop(\n        _is_finished,\n        inner_loop, [\n            tf.constant(0), alive_seq, alive_log_probs, finished_seq,\n            finished_scores, finished_flags, states\n        ],\n        shape_invariants=[\n            tf.TensorShape([]),\n            tf.TensorShape([None, None, None]),\n            alive_log_probs.get_shape(),\n            tf.TensorShape([None, None, None]),\n            finished_scores.get_shape(),\n            finished_flags.get_shape(),\n            nest.map_structure(get_state_shape_invariants, states),\n        ],\n        parallel_iterations=1,\n        back_prop=False)\n\n    alive_seq.set_shape((None, beam_size, None))\n    finished_seq.set_shape((None, beam_size, None))\n\n    # Accounting for corner case: It's possible that no sequence in alive\n    # for a particular batch item ever reached EOS. In that case, we\n    # should just copy the contents of alive for that batch item. tf\n    # reduce_any(finished_flags, 1)\n    # if 0, means that no sequence for that batch index had reached EOS.\n    # We need to do the same for the scores as well.\n    finished_seq = tf.where(\n        tf.reduce_any(finished_flags, 1), finished_seq, alive_seq)\n    finished_scores = tf.where(\n        tf.reduce_any(finished_flags, 1), finished_scores, alive_log_probs)\n    return finished_seq, finished_scores\n"
  },
  {
    "path": "texar_repo/texar/utils/dtypes.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nUtility functions related to data types.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\nfrom __future__ import unicode_literals\n\n# pylint: disable=invalid-name, no-member, protected-access\n\nimport six\nimport numpy as np\n\nimport tensorflow as tf\n\n__all__ = [\n    \"get_tf_dtype\",\n    \"is_callable\",\n    \"is_str\",\n    \"is_placeholder\",\n    \"maybe_hparams_to_dict\",\n    \"compat_as_text\"\n]\n\ndef get_tf_dtype(dtype): # pylint: disable=too-many-return-statements\n    \"\"\"Returns equivalent tf dtype.\n\n    Args:\n        dtype: A str, python numeric or string type, numpy data type, or\n            tf dtype.\n\n    Returns:\n        The corresponding tf dtype.\n    \"\"\"\n    if dtype in {'float', 'float32', 'tf.float32', float,\n                 np.float32, tf.float32}:\n        return tf.float32\n    elif dtype in {'float64', 'tf.float64', np.float64, np.float_, tf.float64}:\n        return tf.float64\n    elif dtype in {'float16', 'tf.float16', np.float16, tf.float16}:\n        return tf.float16\n    elif dtype in {'int', 'int32', 'tf.int32', int, np.int32, tf.int32}:\n        return tf.int32\n    elif dtype in {'int64', 'tf.int64', np.int64, tf.int64}:\n        return tf.int64\n    elif dtype in {'int16', 'tf.int16', np.int16, tf.int16}:\n        return tf.int16\n    elif dtype in {'bool', 'tf.bool', bool, np.bool_, tf.bool}:\n        return tf.bool\n    elif dtype in {'string', 'str', 'tf.string', str, np.str, tf.string}:\n        return tf.string\n    try:\n        if dtype == {'unicode', unicode}:\n            return tf.string\n    except NameError:\n        pass\n\n    raise ValueError(\n        \"Unsupported conversion from type {} to tf dtype\".format(str(dtype)))\n\ndef is_callable(x):\n    \"\"\"Return `True` if :attr:`x` is callable.\n    \"\"\"\n    try:\n        _is_callable = callable(x)\n    except: # pylint: disable=bare-except\n        _is_callable = hasattr(x, '__call__')\n    return _is_callable\n\ndef is_str(x):\n    \"\"\"Returns `True` if :attr:`x` is either a str or unicode. Returns `False`\n    otherwise.\n    \"\"\"\n    return isinstance(x, six.string_types)\n\ndef is_placeholder(x):\n    \"\"\"Returns `True` if :attr:`x` is a :tf_main:`tf.placeholder <placeholder>`\n    or :tf_main:`tf.placeholder_with_default <placeholder_with_default>`.\n    \"\"\"\n    try:\n        return x._ops.type in ['Placeholder', 'PlaceholderWithDefault']\n    except: # pylint: disable=bare-except\n        return False\n\ndef maybe_hparams_to_dict(hparams):\n    \"\"\"If :attr:`hparams` is an instance of :class:`~texar.HParams`,\n    converts it to a `dict` and returns. If :attr:`hparams` is a `dict`,\n    returns as is.\n    \"\"\"\n    if hparams is None:\n        return None\n    if isinstance(hparams, dict):\n        return hparams\n    return hparams.todict()\n\ndef _maybe_list_to_array(str_list, dtype_as):\n    if isinstance(dtype_as, (list, tuple)):\n        return type(dtype_as)(str_list)\n    elif isinstance(dtype_as, np.ndarray):\n        return np.array(str_list)\n    else:\n        return str_list\n\ndef compat_as_text(str_):\n    \"\"\"Converts strings into `unicode` (Python 2) or `str` (Python 3).\n\n    Args:\n        str\\_: A string or other data types convertible to string, or an\n            `n`-D numpy array or (possibly nested) list of such elements.\n\n    Returns:\n        The converted strings of the same structure/shape as :attr:`str_`.\n    \"\"\"\n    def _recur_convert(s):\n        if isinstance(s, (list, tuple, np.ndarray)):\n            s_ = [_recur_convert(si) for si in s]\n            return _maybe_list_to_array(s_, s)\n        else:\n            try:\n                return tf.compat.as_text(s)\n            except TypeError:\n                return tf.compat.as_text(str(s))\n\n    text = _recur_convert(str_)\n\n    return text\n"
  },
  {
    "path": "texar_repo/texar/utils/exceptions.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nTexar defined exceptions.\n\"\"\"\n\n\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\n\n__all__ = [\n    \"TexarError\"\n]\n\nclass TexarError(Exception):\n    \"\"\"\n    Texar error.\n    \"\"\"\n    pass\n\n"
  },
  {
    "path": "texar_repo/texar/utils/mode.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nUtility functions related to mode.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\n\nimport tensorflow as tf\n\nfrom texar import context\n\n__all__ = [\n    \"maybe_global_mode\",\n    \"is_train_mode\",\n    \"is_eval_mode\",\n    \"is_predict_mode\",\n    \"is_train_mode_py\",\n    \"is_eval_mode_py\",\n    \"is_predict_mode_py\",\n    \"switch_dropout\"\n]\n\ndef maybe_global_mode(mode):\n    \"\"\"Returns :func:`texar.global_mode` if :attr:`mode` is `None`,\n    otherwise returns :attr:`mode` as-is.\n    \"\"\"\n    if mode is None:\n        return context.global_mode()\n    else:\n        return mode\n\ndef is_train_mode(mode):\n    \"\"\"Returns a bool Tensor indicating whether the global mode is TRAIN.\n    If :attr:`mode` is `None`, the mode is determined by\n    :func:`texar.global_mode`.\n    \"\"\"\n    if mode is None:\n        return context.global_mode_train()\n    else:\n        return tf.equal(mode, tf.estimator.ModeKeys.TRAIN)\n\ndef is_eval_mode(mode):\n    \"\"\"Returns a bool Tensor indicating whether the global mode is EVAL.\n    If :attr:`mode` is `None`, the mode is determined by\n    :func:`texar.global_mode`.\n    \"\"\"\n    if mode is None:\n        return context.global_mode_eval()\n    else:\n        return tf.equal(mode, tf.estimator.ModeKeys.EVAL)\n\ndef is_predict_mode(mode):\n    \"\"\"Returns a bool Tensor indicating whether the global mode is PREDICT.\n    If :attr:`mode` is `None`, the mode is determined by\n    :func:`texar.global_mode`.\n    \"\"\"\n    if mode is None:\n        return context.global_mode_predict()\n    else:\n        return tf.equal(mode, tf.estimator.ModeKeys.PREDICT)\n\ndef is_train_mode_py(mode, default=True):\n    \"\"\"Returns a python boolean indicating whether the mode is TRAIN.\n\n    Args:\n        mode: A string taking value in\n            :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`.\n            Can be `None`.\n        default (bool): The return value when :attr:`mode` is `None`. Default\n            is `True`.\n\n    Returns:\n        A python boolean.\n    \"\"\"\n    if mode is None:\n        return default\n    if mode not in context.valid_modes():\n        raise ValueError('Unknown mode: {}'.format(mode))\n    return mode == tf.estimator.ModeKeys.TRAIN\n\ndef is_eval_mode_py(mode, default=False):\n    \"\"\"Returns a python boolean indicating whether the mode is EVAL.\n\n    Args:\n        mode: A string taking value in\n            :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`.\n            Can be `None`.\n        default (bool): The return value when :attr:`mode` is `None`. Default\n            is `False`.\n\n    Returns:\n        A python boolean.\n    \"\"\"\n    if mode is None:\n        return default\n    if mode not in context.valid_modes():\n        raise ValueError('Unknown mode: {}'.format(mode))\n    return mode == tf.estimator.ModeKeys.EVAL\n\ndef is_predict_mode_py(mode, default=False):\n    \"\"\"Returns a python boolean indicating whether the mode is PREDICT.\n\n    Args:\n        mode: A string taking value in\n            :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`.\n            Can be `None`.\n        default (bool): The return value when :attr:`mode` is `None`. Default\n            is `False`.\n\n    Returns:\n        A python boolean.\n    \"\"\"\n    if mode is None:\n        return default\n    if mode not in context.valid_modes():\n        raise ValueError('Unknown mode: {}'.format(mode))\n    return mode == tf.estimator.ModeKeys.PREDICT\n\ndef switch_dropout(dropout_keep_prob, mode=None):\n    \"\"\"Turns off dropout when not in training mode.\n\n    Args:\n        dropout_keep_prob: Dropout keep probability in training mode\n        mode (optional): A Tensor taking values of\n            :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`.\n            Dropout is activated if :attr:`mode` is `TRAIN`.\n            If `None`, the mode is inferred from\n            :func:`texar.global_mode`.\n\n    Returns:\n        A unit Tensor that equals the dropout keep probability in `TRAIN` mode,\n        and `1.0` in other modes.\n    \"\"\"\n    return 1. - (1. - dropout_keep_prob) * tf.to_float(is_train_mode(mode))\n"
  },
  {
    "path": "texar_repo/texar/utils/mode_test.py",
    "content": "\n\"\"\"\nUnit tests for mode-related utility functions.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom texar.utils import mode\nfrom texar import context\n\n\nclass UtilsTest(tf.test.TestCase):\n    \"\"\"Tests utility functions.\n    \"\"\"\n\n    def test_mode(self):\n        \"\"\" Tests mode related utilities.\n        \"\"\"\n        training = mode.is_train_mode(None)\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            training_ = sess.run(training)\n            self.assertTrue(training_)\n\n            training_ = sess.run(\n                training,\n                feed_dict={context.global_mode(): tf.estimator.ModeKeys.TRAIN})\n            self.assertTrue(training_)\n\n            training_ = sess.run(\n                training,\n                feed_dict={context.global_mode(): tf.estimator.ModeKeys.EVAL})\n            self.assertFalse(training_)\n\n        training = mode.is_train_mode(tf.estimator.ModeKeys.TRAIN)\n        with self.test_session() as sess:\n            sess.run(tf.global_variables_initializer())\n            training_ = sess.run(training)\n            self.assertTrue(training_)\n\n\nif __name__ == \"__main__\":\n    tf.test.main()\n\n"
  },
  {
    "path": "texar_repo/texar/utils/shapes.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nUtility functions related to tensor shapes.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\n\n# pylint: disable=no-name-in-module, protected-access, no-member, invalid-name\n\nimport numpy as np\n\nimport tensorflow as tf\nfrom tensorflow.python.util import nest\nfrom tensorflow.python.ops import rnn\nfrom tensorflow.python.framework import ops\n\n__all__ = [\n    \"transpose_batch_time\",\n    \"get_batch_size\",\n    \"get_rank\",\n    \"mask_sequences\",\n    \"_mask_sequences_tensor\",\n    \"_mask_sequences_py\",\n    \"flatten\",\n    \"shape_list\",\n    \"pad_and_concat\"\n]\n\n\ndef transpose_batch_time(inputs):\n    \"\"\"Transposes inputs between time-major and batch-major.\n\n    Args:\n        inputs: A Tensor of shape `[batch_size, max_time, ...]` (batch-major)\n            or `[max_time, batch_size, ...]` (time-major), or a (possibly\n            nested) tuple of such elements.\n\n    Returns:\n        A (possibly nested tuple of) Tensor with transposed batch and\n        time dimensions of inputs.\n    \"\"\"\n    flat_input = nest.flatten(inputs)\n    flat_input = [ops.convert_to_tensor(input_) for input_ in flat_input]\n    # pylint: disable=protected-access\n    flat_input = [rnn._transpose_batch_time(input_) for input_ in flat_input]\n    return nest.pack_sequence_as(structure=inputs, flat_sequence=flat_input)\n\ndef get_batch_size(tensor):\n    \"\"\"Returns a unit `Tensor` representing the batch size, i.e.,\n    the size of the 1st dimension of :attr:`tensor`.\n    \"\"\"\n    return tf.shape(tensor)[0]\n\n\ndef get_rank(tensor):\n    \"\"\"Returns the tensor rank as a python `int`. The input tensor can also be\n    a python array.\n\n    Args:\n        tensor: A Tensor or python array.\n\n    Returns:\n        A python `int` representing the rank of :attr:`tensor`. Returns\n        `None` if the rank cannot be determined.\n    \"\"\"\n    if tf.contrib.framework.is_tensor(tensor):\n        shape = tensor.shape\n        try:\n            rank = len(shape.as_list())\n        except ValueError: # when `shape==TensorShape(None)`\n            rank = None\n    else:\n        array = np.asarray(tensor)\n        rank = array.ndim\n    return rank\n\ndef mask_sequences(sequence,\n                   sequence_length,\n                   dtype=None,\n                   time_major=False,\n                   tensor_rank=2):\n    \"\"\"Masks out sequence entries that are beyond the respective sequence\n    lengths. Masks along the time dimension.\n\n    :attr:`sequence` and :attr:`sequence_length` can either be python\n    arrays or Tensors, respectively. If both are python arrays (or None), the\n    return will be a python array as well.\n\n    Args:\n        sequence: A Tensor or python array of sequence values.\n            If `time_major==False` (default), this must be a Tensor of shape\n            `[batch_size, max_time, ...]`. The batch and time dimension is\n            exchanged if `time_major==True`.\n        sequence_length: A Tensor or python array of shape `[batch_size]`.\n            Time steps beyond the respective sequence lengths will be\n            made zero.\n        dtype (dtype): Type of :attr:`sequence`. If `None`, infer from\n            :attr:`sequence` automatically.\n        time_major (bool): The shape format of the inputs. If `True`,\n            :attr:`sequence` must have shape\n            `[max_time, batch_size, ...]`.\n            If `False` (default), :attr:`sequence` must have\n            shape `[batch_size, max_time, ...]`.\n        tensor_rank (int): The number of dimensions of :attr:`sequence`.\n            Default is 2, i.e., :attr:`sequence` is a 2D Tensor consisting\n            of batch and time dimensions. Ignored if both :attr:`sequence`\n            and :attr:`sequence_length` are python arrays.\n\n    Returns:\n        The masked sequence, i.e., a Tensor or python array of the same shape\n        as :attr:`sequence` but with masked-out entries (set to zero).\n\n        If both :attr:`sequence` and :attr:`sequence_length` are python\n        arrays, the returned value is a python array as well.\n    \"\"\"\n    is_tensor = tf.contrib.framework.is_tensor\n    if is_tensor(sequence) or is_tensor(sequence_length):\n        return _mask_sequences_tensor(\n            sequence, sequence_length, dtype, time_major, tensor_rank)\n    else:\n        return _mask_sequences_py(\n            sequence, sequence_length, dtype, time_major)\n\ndef _mask_sequences_tensor(sequence,\n                           sequence_length,\n                           dtype=None,\n                           time_major=False,\n                           tensor_rank=2):\n    \"\"\"Masks out sequence entries that are beyond the respective sequence\n    lengths. Masks along the time dimension.\n\n    Args:\n        sequence: A Tensor of sequence values.\n\n            If `time_major=False` (default), this must be a Tensor of shape:\n                `[batch_size, max_time, d_2, ..., d_rank]`, where the rank of\n                the Tensor is specified with :attr:`tensor_rank`.\n\n            If `time_major=True`, this must be a Tensor of shape:\n                `[max_time, batch_size, d_2, ..., d_rank].`\n        sequence_length: A Tensor of shape `[batch_size]`. Time steps beyond\n            the respective sequence lengths will be made zero.\n        dtype (dtype): Type of :attr:`sequence`. If `None`, infer from\n            :attr:`sequence` automatically.\n        time_major (bool): The shape format of the inputs. If `True`,\n            :attr:`sequence` must have shape\n            `[max_time, batch_size, d_2, ..., d_rank]`.\n            If `False` (default), :attr:`sequence` must have\n            shape `[batch_size, max_time, d_2, ..., d_rank]`.\n        tensor_rank (int): The number of dimensions of :attr:`sequence`.\n            Default is 2, i.e., :attr:`sequence` is a 2D Tensor consisting\n            of batch and time dimensions.\n\n    Returns:\n        The masked sequence, i.e., a Tensor of the same shape as\n        :attr:`sequence` but with masked-out entries (set to zero).\n    \"\"\"\n    if tensor_rank is None:\n        tensor_rank = 2\n    if tensor_rank < 2:\n        raise ValueError(\n            \"tensor_rank must be > 2. Got tensor_rank = {}\".format(tensor_rank))\n    if time_major:\n        sequence = rnn._transpose_batch_time(sequence)\n    max_time = tf.to_int32(tf.shape(sequence)[1])\n    if dtype is None:\n        dtype = sequence.dtype\n    mask = tf.sequence_mask(\n        tf.to_int32(sequence_length), max_time, dtype=dtype)\n    for _ in range(2, tensor_rank):\n        mask = tf.expand_dims(mask, axis=-1)\n    sequence = sequence * mask\n    if time_major:\n        sequence = rnn._transpose_batch_time(sequence)\n    return sequence\n\ndef _mask_sequences_py(sequence,\n                       sequence_length,\n                       dtype=None,\n                       time_major=False):\n    \"\"\"Masks out sequence entries that are beyond the respective sequence\n    lengths. Masks along the time dimension.\n\n    This is the numpy version of :func:`texar.utils.mask_sequences`.\n\n    Args:\n        sequence: An python array of sequence values.\n\n            If `time_major=False` (default), this must be an array of shape:\n                `[batch_size, max_time, ...]`\n\n            If `time_major=True`, this must be a Tensor of shape:\n                `[max_time, batch_size, ...].`\n        sequence_length: An array of shape `[batch_size]`. Time steps beyond\n            the respective sequence lengths will be made zero.\n        dtype (dtype): Type of :attr:`sequence`. If `None`, infer from\n            :attr:`sequence` automatically.\n        time_major (bool): The shape format of the inputs. If `True`,\n            :attr:`sequence` must have shape\n            `[max_time, batch_size, ...]`.\n            If `False` (default), :attr:`sequence` must have\n            shape `[batch_size, max_time, ...]`.\n\n    Returns:\n        The masked sequence, i.e., an array of the same shape as\n        :attr:`sequence` but with masked-out entries (set to zero).\n    \"\"\"\n    sequence = np.array(sequence)\n    sequence_length = np.array(sequence_length)\n\n    rank = sequence.ndim\n    if rank < 2:\n        raise ValueError(\"`sequence` must be 2D or higher order.\")\n    batch_size = sequence.shape[0]\n    max_time = sequence.shape[1]\n    dtype = dtype or sequence.dtype\n\n    if time_major:\n        sequence = np.transpose(sequence, axes=[1, 0, 2])\n\n    steps = np.tile(np.arange(max_time), [batch_size, 1])\n    mask = np.asarray(steps < sequence_length[:, None], dtype=dtype)\n    for _ in range(2, rank):\n        mask = np.expand_dims(mask, -1)\n\n    sequence = sequence * mask\n\n    if time_major:\n        sequence = np.transpose(sequence, axes=[1, 0, 2])\n\n    return sequence\n\n\ndef flatten(tensor, preserve_dims, flattened_dim=None):\n    \"\"\"Flattens a tensor whiling keeping several leading dimensions.\n\n    :attr:`preserve_dims` must < tensor's rank\n\n    Args:\n        tensor: A Tensor to flatten.\n        preserve_dims (int): The number of leading dimensions to preserve.\n        flatterned_dim (int, optional): The size of the resulting flattened\n            dimension. If not given, infer automatically, which can cause\n            a statically unknown dimension size.\n\n    Returns:\n        A Tensor with rank :attr:`perserve_dims`+1.\n\n    Example:\n        .. code-block:: python\n\n            x = tf.ones(shape=[d_1, d_2, d_3, d_4])\n            y = flatten(x, 2) # y.shape == [d_1, d_2, d_3 * d_4]\n    \"\"\"\n    if flattened_dim is None:\n        flattened_dim = -1\n    shape = tf.concat([tf.shape(tensor)[:preserve_dims], [flattened_dim]],\n                      axis=0)\n    tensor_ = tf.reshape(tensor, shape)\n    return tensor_\n\ndef shape_list(x):\n    \"\"\"Returns **static** shape of the input Tensor whenever possible.\n\n    Args:\n        x: A Tensor.\n\n    Returns:\n        - If the rank of :attr:`x` is unknown, returns the dynamic shape: \\\n        `tf.shape(x)`\n        - Otherwise, returns a list of dims, each of which is either an `int` \\\n        whenever it can be statically determined, or a scalar Tensor otherwise.\n    \"\"\"\n    x = tf.convert_to_tensor(x)\n    # If unknown rank, return dynamic shape\n    if x.get_shape().dims is None:\n        return tf.shape(x)\n    static = x.get_shape().as_list()\n    shape = tf.shape(x)\n    ret = []\n    for i, dim in enumerate(static):\n        if dim is None:\n            dim = shape[i]\n        ret.append(dim)\n    return ret\n\ndef pad_and_concat(values, axis, rank=None, pad_axis=None,\n                   pad_constant_values=0):\n    \"\"\"Concats tensors along one dimension. Pads each of other dimensions of\n    the tensors to the corresponding maximum size if necessary.\n\n    Args:\n        values: A list of Tensors of the same rank.\n        axis (int): A Python int. Dimension along which to concatenate.\n        rank (int, optional): Rank of the tensors. If `None`, inferred\n            automatically from :attr:`values`.\n        pad_axis (int or list, optional): A Python int or a list of int.\n            Dimensions to pad. Paddings are only added to the end of\n            corresponding dimensions. If `None`, all dimensions except the\n            :attr:`axis` dimension are padded.\n        pad_constant_values: The scalar pad value to use. Must be same type\n            as the tensors.\n\n    Returns:\n        A `Tensor` resulting from padding and concatenation of the input\n        tensors.\n\n    Raises:\n        ValueError: If :attr:`rank` is `None` and cannot be inferred from\n            :attr:`values`.\n\n\n    Example:\n\n        .. code-block:: python\n\n            a = tf.ones([1, 2])\n            b = tf.ones([2, 3])\n\n            c = pad_and_concat([a,b], 0)\n            # c.shape == [3, 3]\n            # c == [[1, 1, 0],\n            #       [1, 1, 1],\n            #       [1, 1, 1]]\n\n            d = pad_and_concat([a,b], 1)\n            # d.shape == [2, 5]\n            # d == [[1, 1, 1, 1, 1]\n            #       [0, 0, 1, 1, 1]]\n    \"\"\"\n    if rank is None:\n        for value in values:\n            rank = get_rank(value)\n            if rank is not None:\n                break\n    if rank is None:\n        raise ValueError('Cannot determine the rank of the tensors')\n\n    def _pad_to_size(value, axis_, size):\n        \"\"\"Pads the :attr:`axis_` of a tensor :attr:`value` to the given\n        :attr:`size`. Only pads to the end.\n\n        Args:\n            value: A Tensor.\n            axis_: A Python int.\n            size: A scalar int Tensor or Python int.\n        \"\"\"\n        paddings = np.zeros([rank, 2], dtype=np.int32)\n        paddings[axis_, 1] = 1\n        paddings = paddings * (size - tf.shape(value)[axis_])\n        return tf.pad(value, paddings, mode='CONSTANT',\n                      constant_values=pad_constant_values)\n\n    if pad_axis is None:\n        pad_axis = [r for r in range(rank) if r != axis]\n\n    pad_axis = pad_axis if isinstance(pad_axis, (list, tuple)) else [pad_axis]\n\n    for pa in pad_axis:\n        max_dim_size = tf.reduce_max([tf.shape(v)[pa] for v in values])\n        for i, v in enumerate(values):\n            values[i] = _pad_to_size(v, pa, max_dim_size)\n\n    return tf.concat(values, axis)\n"
  },
  {
    "path": "texar_repo/texar/utils/shapes_test.py",
    "content": "\"\"\"\nUnit tests for shape-related utility functions.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=no-member\n\nimport numpy as np\n\nimport tensorflow as tf\n\nfrom texar.utils import shapes\n\nclass ShapesTest(tf.test.TestCase):\n    \"\"\"Tests shape-related utility functions.\n    \"\"\"\n\n    def test_mask_sequences(self):\n        \"\"\"Tests :func:`texar.utils.shapes.mask_sequences`.\n        \"\"\"\n        seq = np.ones([3, 4, 3], dtype=np.int32)\n        seq_length = np.array([3, 2, 1], dtype=np.int32)\n\n        masked_seq = shapes.mask_sequences(seq, seq_length)\n        self.assertEqual(masked_seq.shape, seq.shape)\n        seq_sum = np.sum(masked_seq, axis=(1, 2))\n        np.testing.assert_array_equal(seq_sum, seq_length * 3)\n\n    def test_pad_and_concat(self):\n        \"\"\"Test :func:`texar.utils.shapes.pad_and_concat`.\n        \"\"\"\n        a = tf.ones([3, 10, 2])\n        b = tf.ones([4, 20, 3])\n        c = tf.ones([5, 1, 4])\n\n        t = shapes.pad_and_concat([a, b, c], 0)\n        self.assertEqual(t.shape, [3+4+5, 20, 4])\n        t = shapes.pad_and_concat([a, b, c], 1)\n        self.assertEqual(t.shape, [5, 10+20+1, 4])\n        t = shapes.pad_and_concat([a, b, c], 2)\n        self.assertEqual(t.shape, [5, 20, 2+3+4])\n\n        d = tf.placeholder(dtype=tf.float32, shape=[6, None, 1])\n        t = shapes.pad_and_concat([a, b, c, d], 0)\n        with self.test_session() as sess:\n            t_ = sess.run(t, feed_dict={d: np.ones([6, 2, 1])})\n            self.assertEqual(list(t_.shape), [3+4+5+6, 20, 4])\n\nif __name__ == \"__main__\":\n    tf.test.main()\n\n"
  },
  {
    "path": "texar_repo/texar/utils/transformer_attentions.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Attentions specific to Transformer.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport numpy as np\n\nimport tensorflow as tf\n\nfrom texar import context\n\n# pylint: disable=too-many-arguments, invalid-name, no-member\n\n__all__ = [\n    'attention_bias_lower_triangle',\n    'attention_bias_ignore_padding',\n    'attention_bias_local',\n]\n\ndef attention_bias_lower_triangle(length, bias_value=-1e18):\n    \"\"\"Create an bias tensor to be added to attention logits.\n    Allows a query to attend to all positions up to and including its own.\n\n    Args:\n        length: a scalar.\n\n    Returns:\n        a `Tensor` with shape [1, 1, length, length].\n    \"\"\"\n    return attention_bias_local(length, -1, 0, bias_value)\n\ndef attention_bias_local(length, max_backward, max_forward, bias_value=-1e18):\n    \"\"\"Create an bias tensor to be added to attention logits.\n    A position may attend to positions at most max_distance from it,\n    forward and backwards.\n\n    This does not actually save any computation.\n\n    Args:\n        length: int\n        max_backward: int, maximum distance backward to attend. Negative\n        values indicate unlimited.\n        max_forward: int, maximum distance forward to attend. Negative\n        values indicate unlimited.\n\n    Returns:\n        a `Tensor` with shape [1, 1, length, length].\n        [batch_size, num_heads, queri_len, queri_len]\n    \"\"\"\n    band = _ones_matrix_band_part(\n        length,\n        length,\n        max_backward,\n        max_forward,\n        out_shape=[1, 1, length, length])\n    return bias_value * (1.0 - band)\n\ndef attention_bias_ignore_padding(memory_padding, bias_value=-1e18):\n    \"\"\"Create an bias tensor to be added to attention logits.\n\n    Args:\n        memory_padding: a float `Tensor` with shape [batch, memory_length].\n\n    Returns:\n        a `Tensor` with shape [batch, 1, 1, memory_length].\n        each dim corresponding to batch_size, num_heads, queries_len,\n        memory_length\n    \"\"\"\n    ret = memory_padding * bias_value\n    return tf.expand_dims(tf.expand_dims(ret, axis=1), axis=1)\n\ndef _ones_matrix_band_part(rows, cols, num_lower, num_upper,\n    out_shape=None):\n    \"\"\"Matrix band part of ones.\n    \"\"\"\n    if all([isinstance(el, int) for el in [rows, cols, num_lower,\n        num_upper]]):\n    # Needed info is constant, so we construct in numpy\n        if num_lower < 0:\n            num_lower = rows - 1\n        if num_upper < 0:\n            num_upper = cols - 1\n        lower_mask = np.tri(cols, rows, num_lower).T\n        upper_mask = np.tri(rows, cols, num_upper)\n        band = np.ones((rows, cols)) * lower_mask * upper_mask\n        if out_shape:\n            band = band.reshape(out_shape)\n        band = tf.constant(band, tf.float32)\n    else:\n        band = tf.matrix_band_part(tf.ones([rows, cols]),\n                                   tf.cast(num_lower, tf.int64),\n                                   tf.cast(num_upper, tf.int64))\n        if out_shape:\n            band = tf.reshape(band, out_shape)\n    return band\n"
  },
  {
    "path": "texar_repo/texar/utils/transformer_utils.py",
    "content": "# Copyright 2018 The Tensor2Tensor Authors.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#         http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n# Modifications copyright (C) 2018 Texar\n# ==============================================================================\n\"\"\"\nThis script is adapted from the tensor2tensor repository.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\nfrom __future__ import unicode_literals\n\nimport tensorflow as tf\n\n# pylint: disable=invalid-name, too-many-arguments, too-many-locals\n\nclass PadRemover(object):\n    \"\"\"Helper to remove padding from a tensor before sending to the experts.\n    The padding is computed for one reference tensor containing the padding mask\n    and then can be applied to any other tensor of shape [dim_origin,...].\n\n    Example::\n\n            input = [\n                [tok1, tok2],\n                [tok3, tok4],\n                [0, 0],\n                [0, 0],\n                [tok5, tok6],\n                [0, 0],\n            ]\n            output = [\n                [tok1, tok2],\n                [tok3, tok4],\n                [tok5, tok6],\n            ]\n    \"\"\"\n\n    def __init__(self, pad_mask):\n        \"\"\"Compute and store the location of the padding.\n\n        Args:\n            pad_mask (tf.Tensor): Reference padding tensor of shape\n                [batch_size,length] or [dim_origin]\n                (dim_origin=batch_size*length)\n                containing non-zeros positive values to indicate padding\n                location.\n        \"\"\"\n        self.nonpad_ids = None\n        self.dim_origin = None\n\n        with tf.name_scope(\"pad_reduce/get_ids\"):\n            pad_mask = tf.reshape(pad_mask, [-1])    # Flatten the batch\n            # nonpad_ids contains coordinates of zeros rows (as pad_mask is\n            # float32, checking zero equality is done with |x| < epsilon, with\n            # epsilon=1e-9 as standard, here pad_mask only contains positive\n            # values so tf.abs would be redundant)\n            self.nonpad_ids = tf.to_int32(tf.where(pad_mask < 1e-9))\n            self.dim_origin = tf.shape(pad_mask)[:1]\n\n    def remove(self, x):\n        \"\"\"Remove padding from the given tensor.\n\n        Args:\n            x: A Tensor of shape [dim_origin,...]\n\n        Returns:\n            A tensor of shape [dim_compressed,...] with dim_compressed\n            <= dim_origin\n        \"\"\"\n        with tf.name_scope(\"pad_reduce/remove\"):\n            x_shape = x.get_shape().as_list()\n            x = tf.gather_nd(\n                x,\n                indices=self.nonpad_ids,\n            )\n            #if not context.in_eager_mode():\n            # This is a hack but for some reason, gather_nd return a tensor of\n            # undefined shape, so the shape is set up manually\n            x.set_shape([None] + x_shape[1:])\n        return x\n\n    def restore(self, x):\n        \"\"\"Add padding back to the given tensor.\n\n        Args:\n            x: A Tensor of shape [dim_compressed,...]\n\n        Returns:\n            A tensor of shape [dim_origin,...] with\n            dim_compressed >= dim_origin. The\n            dim is restored from the original reference tensor\n        \"\"\"\n        with tf.name_scope(\"pad_reduce/restore\"):\n            x = tf.scatter_nd(\n                indices=self.nonpad_ids,\n                updates=x,\n                shape=tf.concat([self.dim_origin, tf.shape(x)[1:]], axis=0),\n            )\n        return x\n\ndef embedding_to_padding(emb):\n    \"\"\"Calculates the padding mask based on which embeddings are all zero.\n    We have hacked symbol_modality to return all-zero embeddings\n    for padding.\n\n    Args:\n        emb: a Tensor with shape [..., depth].\n\n    Returns:\n        a float Tensor with shape [...].\n    \"\"\"\n    emb_sum = tf.reduce_sum(tf.abs(emb), axis=-1)\n    return tf.to_float(tf.equal(emb_sum, 0.0))\n\ndef smoothing_cross_entropy(logits,\n                            labels,\n                            vocab_size,\n                            confidence,\n                            gaussian=False,\n                            zero_pad=True):\n    \"\"\"Cross entropy with label smoothing to limit over-confidence.\n\n    Args:\n        logits: Tensor of size [batch_size, ?, vocab_size]\n        labels: Tensor of size [batch_size, ?]\n        vocab_size: Tensor representing the size of the vocabulary.\n        confidence: Used to determine on and off values for label\n            smoothing. If `gaussian` is true, `confidence` is the\n            variance to the gaussian distribution.\n        gaussian: Uses a gaussian distribution for label smoothing\n        zero_pad: use 0 as the probabitlity of the padding\n            in the smoothed labels. By setting this, we replicate the\n            numeric calculation of tensor2tensor, which doesn't set the\n            <BOS> token in the vocabulary.\n\n    Returns:\n        A float scalar Tensor containing the cross entropy loss.\n    \"\"\"\n    with tf.name_scope(\"smoothing_cross_entropy\", values=[logits, labels]):\n        # Low confidence is given to all non-true labels, uniformly.\n        if zero_pad:\n            low_confidence = (1.0 - confidence) / tf.to_float(\n                vocab_size - 2)\n        else:\n            low_confidence = (1.0 - confidence) / tf.to_float(\n                vocab_size - 1)\n\n        if gaussian and confidence > 0.0:\n            labels = tf.cast(labels, tf.float32)\n            normal_dist = tf.distributions.Normal(loc=labels,\n                                                  scale=confidence)\n            soft_targets = normal_dist.prob(\n                tf.cast(tf.range(vocab_size), tf.float32)\\\n                    [:, None, None])\n            # Reordering soft_targets from [vocab_size, batch_size, ?]\n            # to match logits: [batch_size, ?, vocab_size]\n            soft_targets = tf.transpose(soft_targets, perm=[1, 2, 0])\n        else:\n            soft_targets = tf.one_hot(\n                tf.cast(labels, tf.int32),\n                depth=vocab_size,\n                on_value=confidence,\n                off_value=low_confidence,\n                dtype=logits.dtype)\n        if zero_pad:\n            soft_targets = tf.concat([tf.expand_dims(\\\n                tf.zeros_like(labels, dtype=tf.float32), 2),\\\n                soft_targets[:, :, 1:]], -1)\n\n        if hasattr(tf.nn, 'softmax_cross_entropy_with_logits_v2'):\n            cross_entropy_fn = tf.nn.softmax_cross_entropy_with_logits_v2\n        else:\n            cross_entropy_fn = tf.nn.softmax_cross_entropy_with_logits\n\n    return cross_entropy_fn(\n        logits=logits, labels=tf.stop_gradient(soft_targets))\n\n"
  },
  {
    "path": "texar_repo/texar/utils/utils.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nMiscellaneous Utility functions.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\n\n# pylint: disable=invalid-name, no-member, no-name-in-module, protected-access\n# pylint: disable=redefined-outer-name, too-many-arguments\n\nimport inspect\nimport funcsigs\nfrom pydoc import locate\nimport copy\nimport collections\nimport numpy as np\n\nimport tensorflow as tf\n\nfrom texar.hyperparams import HParams\nfrom texar.utils.dtypes import is_str, is_callable, compat_as_text, \\\n        _maybe_list_to_array\n\n# pylint: disable=anomalous-backslash-in-string\n\nMAX_SEQ_LENGTH = np.iinfo(np.int32).max\n\n## Some modules cannot be imported directly,\n## e.g., `import tensorflow.train` fails.\n## Such modules are treated in a special way in utils like `get_class` as below.\n#_unimportable_modules = {\n#    'tensorflow.train', 'tensorflow.keras.regularizers'\n#}\n\n__all__ = [\n    \"_inspect_getargspec\",\n    \"get_args\",\n    \"get_default_arg_values\",\n    \"check_or_get_class\",\n    \"get_class\",\n    \"check_or_get_instance\",\n    \"get_instance\",\n    \"check_or_get_instance_with_redundant_kwargs\",\n    \"get_instance_with_redundant_kwargs\",\n    \"get_function\",\n    \"call_function_with_redundant_kwargs\",\n    \"get_instance_kwargs\",\n    \"dict_patch\",\n    \"dict_lookup\",\n    \"dict_fetch\",\n    \"dict_pop\",\n    \"flatten_dict\",\n    \"strip_token\",\n    \"strip_eos\",\n    \"strip_bos\",\n    \"strip_special_tokens\",\n    \"str_join\",\n    \"map_ids_to_strs\",\n    \"default_str\",\n    \"uniquify_str\",\n    \"ceildiv\",\n    \"straight_through\"\n]\n\n\n# TODO(zhiting): complete this\ndef _expand_name(name):\n    \"\"\"Replaces common shorthands with respective full names.\n\n        \"tf.xxx\" --> \"tensorflow.xxx\"\n        \"tx.xxx\" --> \"texar.xxx\"\n    \"\"\"\n    return name\n\ndef _inspect_getargspec(fn):\n    \"\"\"Returns `inspect.getargspec(fn)` for Py2 and `inspect.getfullargspec(fn)`\n    for Py3\n    \"\"\"\n    try:\n        return inspect.getfullargspec(fn)\n    except AttributeError:\n        return inspect.getargspec(fn)\n\ndef get_args(fn):\n    \"\"\"Gets the arguments of a function.\n\n    Args:\n        fn (callable): The function to inspect.\n\n    Returns:\n        list: A list of argument names (str) of the function.\n    \"\"\"\n    argspec = _inspect_getargspec(fn)\n    args = argspec.args\n\n    # Empty args can be because `fn` is decorated. Use `funcsigs.signature`\n    # to re-do the inspect\n    if len(args) == 0:\n        args = funcsigs.signature(fn).parameters.keys()\n        args = list(args)\n\n    return args\n\ndef get_default_arg_values(fn):\n    \"\"\"Gets the arguments and respective default values of a function.\n\n    Only arguments with default values are included in the output dictionary.\n\n    Args:\n        fn (callable): The function to inspect.\n\n    Returns:\n        dict: A dictionary that maps argument names (str) to their default\n        values. The dictionary is empty if no arguments have default values.\n    \"\"\"\n    argspec = _inspect_getargspec(fn)\n    if argspec.defaults is None:\n        return {}\n    num_defaults = len(argspec.defaults)\n    return dict(zip(argspec.args[-num_defaults:], argspec.defaults))\n\n\ndef check_or_get_class(class_or_name, module_path=None, superclass=None):\n    \"\"\"Returns the class and checks if the class inherits :attr:`superclass`.\n\n    Args:\n        class_or_name: Name or full path to the class, or the class itself.\n        module_paths (list, optional): Paths to candidate modules to search\n            for the class. This is used if :attr:`class_or_name` is a string and\n            the class cannot be located solely based on :attr:`class_or_name`.\n            The first module in the list that contains the class\n            is used.\n        superclass (optional): A (list of) classes that the target class\n            must inherit.\n\n    Returns:\n        The target class.\n\n    Raises:\n        ValueError: If class is not found based on :attr:`class_or_name` and\n            :attr:`module_paths`.\n        TypeError: If class does not inherits :attr:`superclass`.\n    \"\"\"\n    class_ = class_or_name\n    if is_str(class_):\n        class_ = get_class(class_, module_path)\n    if superclass is not None:\n        if not issubclass(class_, superclass):\n            raise TypeError(\n                \"A subclass of {} is expected. Got: {}\".format(\n                    superclass, class_))\n    return class_\n\ndef get_class(class_name, module_paths=None):\n    \"\"\"Returns the class based on class name.\n\n    Args:\n        class_name (str): Name or full path to the class.\n        module_paths (list): Paths to candidate modules to search for the\n            class. This is used if the class cannot be located solely based on\n            `class_name`. The first module in the list that contains the class\n            is used.\n\n    Returns:\n        The target class.\n\n    Raises:\n        ValueError: If class is not found based on :attr:`class_name` and\n            :attr:`module_paths`.\n    \"\"\"\n    class_ = locate(class_name)\n    if (class_ is None) and (module_paths is not None):\n        for module_path in module_paths:\n            #if module_path in _unimportable_modules:\n            # Special treatment for unimportable modules by directly\n            # accessing the class\n            class_ = locate('.'.join([module_path, class_name]))\n            if class_ is not None:\n                break\n            #else:\n            #    module = importlib.import_module(module_path)\n            #    if class_name in dir(module):\n            #        class_ = getattr(module, class_name)\n            #        break\n\n    if class_ is None:\n        raise ValueError(\n            \"Class not found in {}: {}\".format(module_paths, class_name))\n\n    return class_\n\ndef check_or_get_instance(ins_or_class_or_name, kwargs, module_paths=None,\n                          classtype=None):\n    \"\"\"Returns a class instance and checks types.\n\n    Args:\n        ins_or_class_or_name: Can be of 3 types:\n\n            - A class to instantiate.\n            - A string of the name or full path to a class to \\\n              instantiate.\n            - The class instance to check types.\n\n        kwargs (dict): Keyword arguments for the class constructor. Ignored\n            if `ins_or_class_or_name` is a class instance.\n        module_paths (list, optional): Paths to candidate modules to\n            search for the class. This is used if the class cannot be\n            located solely based on :attr:`class_name`. The first module\n            in the list that contains the class is used.\n        classtype (optional): A (list of) class of which the instance must\n            be an instantiation.\n\n    Raises:\n        ValueError: If class is not found based on :attr:`class_name` and\n            :attr:`module_paths`.\n        ValueError: If :attr:`kwargs` contains arguments that are invalid\n            for the class construction.\n        TypeError: If the instance is not an instantiation of\n            :attr:`classtype`.\n    \"\"\"\n    ret = ins_or_class_or_name\n    if is_str(ret) or isinstance(ret, type):\n        ret = get_instance(ret, kwargs, module_paths)\n    if classtype is not None:\n        if not isinstance(ret, classtype):\n            raise TypeError(\n                \"An instance of {} is expected. Got: {}\".format(classtype, ret))\n    return ret\n\ndef get_instance(class_or_name, kwargs, module_paths=None):\n    \"\"\"Creates a class instance.\n\n    Args:\n        class_or_name: A class, or its name or full path to a class to\n            instantiate.\n        kwargs (dict): Keyword arguments for the class constructor.\n        module_paths (list, optional): Paths to candidate modules to\n            search for the class. This is used if the class cannot be\n            located solely based on :attr:`class_name`. The first module\n            in the list that contains the class is used.\n\n    Returns:\n        A class instance.\n\n    Raises:\n        ValueError: If class is not found based on :attr:`class_or_name` and\n            :attr:`module_paths`.\n        ValueError: If :attr:`kwargs` contains arguments that are invalid\n            for the class construction.\n    \"\"\"\n    # Locate the class\n    class_ = class_or_name\n    if is_str(class_):\n        class_ = get_class(class_, module_paths)\n\n    # Check validity of arguments\n    class_args = set(get_args(class_.__init__))\n\n    if kwargs is None:\n        kwargs = {}\n    for key in kwargs.keys():\n        if key not in class_args:\n            raise ValueError(\n                \"Invalid argument for class %s.%s: %s, valid args: %s\" %\n                (class_.__module__, class_.__name__, key, list(class_args)))\n\n    return class_(**kwargs)\n\ndef check_or_get_instance_with_redundant_kwargs(\n        ins_or_class_or_name, kwargs, module_paths=None, classtype=None):\n    \"\"\"Returns a class instance and checks types.\n\n    Only those keyword arguments in :attr:`kwargs` that are included in the\n    class construction method are used.\n\n    Args:\n        ins_or_class_or_name: Can be of 3 types:\n\n            - A class to instantiate.\n            - A string of the name or module path to a class to \\\n              instantiate.\n            - The class instance to check types.\n\n        kwargs (dict): Keyword arguments for the class constructor.\n        module_paths (list, optional): Paths to candidate modules to\n            search for the class. This is used if the class cannot be\n            located solely based on :attr:`class_name`. The first module\n            in the list that contains the class is used.\n        classtype (optional): A (list of) classes of which the instance must\n            be an instantiation.\n\n    Raises:\n        ValueError: If class is not found based on :attr:`class_name` and\n            :attr:`module_paths`.\n        ValueError: If :attr:`kwargs` contains arguments that are invalid\n            for the class construction.\n        TypeError: If the instance is not an instantiation of\n            :attr:`classtype`.\n    \"\"\"\n    ret = ins_or_class_or_name\n    if is_str(ret) or isinstance(ret, type):\n        ret = get_instance_with_redundant_kwargs(ret, kwargs, module_paths)\n    if classtype is not None:\n        if not isinstance(ret, classtype):\n            raise TypeError(\n                \"An instance of {} is expected. Got: {}\".format(classtype, ret))\n    return ret\n\ndef get_instance_with_redundant_kwargs(\n        class_name, kwargs, module_paths=None):\n    \"\"\"Creates a class instance.\n\n    Only those keyword arguments in :attr:`kwargs` that are included in the\n    class construction method are used.\n\n    Args:\n        class_name (str): A class or its name or module path.\n        kwargs (dict): A dictionary of arguments for the class constructor. It\n            may include invalid arguments which will be ignored.\n        module_paths (list of str): A list of paths to candidate modules to\n            search for the class. This is used if the class cannot be located\n            solely based on :attr:`class_name`. The first module in the list\n            that contains the class is used.\n\n    Returns:\n        A class instance.\n\n    Raises:\n        ValueError: If class is not found based on :attr:`class_name` and\n            :attr:`module_paths`.\n    \"\"\"\n    # Locate the class\n    class_ = get_class(class_name, module_paths)\n\n    # Select valid arguments\n    selected_kwargs = {}\n    class_args = set(get_args(class_.__init__))\n    if kwargs is None:\n        kwargs = {}\n    for key, value in kwargs.items():\n        if key in class_args:\n            selected_kwargs[key] = value\n\n    return class_(**selected_kwargs)\n\ndef get_function(fn_or_name, module_paths=None):\n    \"\"\"Returns the function of specified name and module.\n\n    Args:\n        fn_or_name (str or callable): Name or full path to a function, or the\n            function itself.\n        module_paths (list, optional): A list of paths to candidate modules to\n            search for the function. This is used only when the function\n            cannot be located solely based on :attr:`fn_or_name`. The first\n            module in the list that contains the function is used.\n\n    Returns:\n        A function.\n    \"\"\"\n    if is_callable(fn_or_name):\n        return fn_or_name\n\n    fn = locate(fn_or_name)\n    if (fn is None) and (module_paths is not None):\n        for module_path in module_paths:\n            #if module_path in _unimportable_modules:\n            fn = locate('.'.join([module_path, fn_or_name]))\n            if fn is not None:\n                break\n            #module = importlib.import_module(module_path)\n            #if fn_name in dir(module):\n            #    fn = getattr(module, fn_name)\n            #    break\n\n    if fn is None:\n        raise ValueError(\n            \"Method not found in {}: {}\".format(module_paths, fn_or_name))\n\n    return fn\n\n\ndef call_function_with_redundant_kwargs(fn, kwargs):\n    \"\"\"Calls a function and returns the results.\n\n    Only those keyword arguments in :attr:`kwargs` that are included in the\n    function's argument list are used to call the function.\n\n    Args:\n        fn (function): A callable. If :attr:`fn` is not a python function,\n            :attr:`fn.__call__` is called.\n        kwargs (dict): A `dict` of arguments for the callable. It\n            may include invalid arguments which will be ignored.\n\n    Returns:\n        The returned results by calling :attr:`fn`.\n    \"\"\"\n    try:\n        fn_args = set(get_args(fn))\n    except TypeError:\n        fn_args = set(get_args(fn.__cal__))\n\n    if kwargs is None:\n        kwargs = {}\n\n    # Select valid arguments\n    selected_kwargs = {}\n    for key, value in kwargs.items():\n        if key in fn_args:\n            selected_kwargs[key] = value\n\n    return fn(**selected_kwargs)\n\n\ndef get_instance_kwargs(kwargs, hparams):\n    \"\"\"Makes a dict of keyword arguments with the following structure:\n\n    `kwargs_ = {'hparams': dict(hparams), **kwargs}`.\n\n    This is typically used for constructing a module which takes a set of\n    arguments as well as a argument named `hparams`.\n\n    Args:\n        kwargs (dict): A dict of keyword arguments. Can be `None`.\n        hparams: A dict or an instance of :class:`~texar.HParams` Can be `None`.\n\n    Returns:\n        A `dict` that contains the keyword arguments in :attr:`kwargs`, and\n        an additional keyword argument named `hparams`.\n    \"\"\"\n    if hparams is None or isinstance(hparams, dict):\n        kwargs_ = {'hparams': hparams}\n    elif isinstance(hparams, HParams):\n        kwargs_ = {'hparams': hparams.todict()}\n    else:\n        raise ValueError(\n            '`hparams` must be a dict, an instance of HParams, or a `None`.')\n    kwargs_.update(kwargs or {})\n    return kwargs_\n\ndef dict_patch(tgt_dict, src_dict):\n    \"\"\"Recursively patch :attr:`tgt_dict` by adding items from :attr:`src_dict`\n    that do not exist in :attr:`tgt_dict`.\n\n    If respective items in :attr:`src_dict` and :attr:`tgt_dict` are both\n    `dict`, the :attr:`tgt_dict` item is patched recursively.\n\n    Args:\n        tgt_dict (dict): Target dictionary to patch.\n        src_dict (dict): Source dictionary.\n\n    Return:\n        dict: The new :attr:`tgt_dict` that is patched.\n    \"\"\"\n    if src_dict is None:\n        return tgt_dict\n\n    for key, value in src_dict.items():\n        if key not in tgt_dict:\n            tgt_dict[key] = copy.deepcopy(value)\n        elif isinstance(value, dict) and isinstance(tgt_dict[key], dict):\n            tgt_dict[key] = dict_patch(tgt_dict[key], value)\n    return tgt_dict\n\ndef dict_lookup(dict_, keys, default=None):\n    \"\"\"Looks up :attr:`keys` in the dict, returns the corresponding values.\n\n    The :attr:`default` is used for keys not present in the dict.\n\n    Args:\n        dict_ (dict): A dictionary for lookup.\n        keys: A numpy array or a (possibly nested) list of keys.\n        default (optional): Value to be returned when a key is not in\n            :attr:`dict_`. Error is raised if :attr:`default` is not given and\n            key is not in the dict.\n\n    Returns:\n        A numpy array of values with the same structure as :attr:`keys`.\n\n    Raises:\n        TypeError: If key is not in :attr:`dict_` and :attr:`default` is `None`.\n    \"\"\"\n    return np.vectorize(lambda x: dict_.get(x, default))(keys)\n\ndef dict_fetch(src_dict, tgt_dict_or_keys):\n    \"\"\"Fetches a sub dict of :attr:`src_dict` with the keys in\n    :attr:`tgt_dict_or_keys`.\n\n    Args:\n        src_dict: A dict or instance of :class:`~texar.HParams`.\n            The source dict to fetch values from.\n        tgt_dict_or_keys: A dict, instance of :class:`~texar.HParams`,\n            or a list (or a dict_keys) of keys to be included in the output\n            dict.\n\n    Returns:\n        A new dict that is a subdict of :attr:`src_dict`.\n    \"\"\"\n    if src_dict is None:\n        return src_dict\n\n    if isinstance(tgt_dict_or_keys, HParams):\n        tgt_dict_or_keys = tgt_dict_or_keys.todict()\n    if isinstance(tgt_dict_or_keys, dict):\n        tgt_dict_or_keys = tgt_dict_or_keys.keys()\n    keys = list(tgt_dict_or_keys)\n\n    if isinstance(src_dict, HParams):\n        src_dict = src_dict.todict()\n\n    return {k: src_dict[k] for k in keys if k in src_dict}\n\ndef dict_pop(dict_, pop_keys, default=None):\n    \"\"\"Removes keys from a dict and returns their values.\n\n    Args:\n        dict_ (dict): A dictionary from which items are removed.\n        pop_keys: A key or a list of keys to remove and return respective\n            values or :attr:`default`.\n        default (optional): Value to be returned when a key is not in\n            :attr:`dict_`. The default value is `None`.\n\n    Returns:\n        A `dict` of the items removed from :attr:`dict_`.\n    \"\"\"\n    if not isinstance(pop_keys, (list, tuple)):\n        pop_keys = [pop_keys]\n    ret_dict = {key: dict_.pop(key, default) for key in pop_keys}\n    return ret_dict\n\ndef flatten_dict(dict_, parent_key=\"\", sep=\".\"):\n    \"\"\"Flattens a nested dictionary. Namedtuples within the dictionary are\n    converted to dicts.\n\n    Adapted from:\n    https://github.com/google/seq2seq/blob/master/seq2seq/models/model_base.py\n\n    Args:\n        dict_ (dict): The dictionary to flatten.\n        parent_key (str): A prefix to prepend to each key.\n        sep (str): Separator that intervenes between parent and child keys.\n            E.g., if `sep` == '.', then `{ \"a\": { \"b\": 3 } }` is converted\n            into `{ \"a.b\": 3 }`.\n\n    Returns:\n        A new flattened `dict`.\n    \"\"\"\n    items = []\n    for key, value in dict_.items():\n        key_ = parent_key + sep + key if parent_key else key\n        if isinstance(value, collections.MutableMapping):\n            items.extend(flatten_dict(value, key_, sep=sep).items())\n        elif isinstance(value, tuple) and hasattr(value, \"_asdict\"):\n            dict_items = collections.OrderedDict(zip(value._fields, value))\n            items.extend(flatten_dict(dict_items, key_, sep=sep).items())\n        else:\n            items.append((key_, value))\n    return dict(items)\n\ndef default_str(str_, default_str):\n    \"\"\"Returns :attr:`str_` if it is not `None` or empty, otherwise returns\n    :attr:`default_str`.\n\n    Args:\n        str_: A string.\n        default_str: A string.\n\n    Returns:\n        Either :attr:`str_` or :attr:`default_str`.\n    \"\"\"\n    if str_ is not None and str_ != \"\":\n        return str_\n    else:\n        return default_str\n\ndef uniquify_str(str_, str_set):\n    \"\"\"Uniquifies :attr:`str_` if :attr:`str_` is included in :attr:`str_set`.\n\n    This is done by appending a number to :attr:`str_`. Returns\n    :attr:`str_` directly if it is not included in :attr:`str_set`.\n\n    Args:\n        str_ (string): A string to uniquify.\n        str_set (set, dict, or list): A collection of strings. The returned\n            string is guaranteed to be different from the elements in the\n            collection.\n\n    Returns:\n        The uniquified string. Returns :attr:`str_` directly if it is\n        already unique.\n\n    Example:\n\n        .. code-block:: python\n\n            print(uniquify_str('name', ['name', 'name_1']))\n            # 'name_2'\n\n    \"\"\"\n    if str_ not in str_set:\n        return str_\n    else:\n        for i in range(1, len(str_set)+1):\n            unique_str = str_ + \"_%d\" % i\n            if unique_str not in str_set:\n                return unique_str\n    raise ValueError(\"Fails to uniquify string: \" + str_)\n\n\ndef _recur_split(s, dtype_as):\n    \"\"\"Splits (possibly nested list of) strings recursively.\n    \"\"\"\n    if is_str(s):\n        return _maybe_list_to_array(s.split(), dtype_as)\n    else:\n        s_ = [_recur_split(si, dtype_as) for si in s]\n        return _maybe_list_to_array(s_, s)\n\n\ndef strip_token(str_, token, is_token_list=False, compat=True):\n    \"\"\"Returns a copy of strings with leading and trailing tokens removed.\n\n    Note that besides :attr:`token`, all leading and trailing whitespace\n    characters are also removed.\n\n    If :attr:`is_token_list` is False, then the function assumes tokens in\n    :attr:`str_` are separated with whitespace character.\n\n    Args:\n        str\\_: A `str`, or an `n`-D numpy array or (possibly nested)\n            list of `str`.\n        token (str): The token to strip, e.g., the '<PAD>' token defined in\n            :class:`~texar.data.SpecialTokens`.PAD\n        is_token_list (bool): Whether each sentence in :attr:`str_` is a list\n            of tokens. If False, each sentence in :attr:`str_` is assumed to\n            contain tokens separated with space character.\n        compat (bool): Whether to convert tokens into `unicode` (Python 2)\n            or `str` (Python 3).\n\n    Returns:\n        The stripped strings of the same structure/shape as :attr:`str_`.\n\n    Example:\n\n        .. code-block:: python\n\n            str_ = '<PAD> a sentence <PAD> <PAD>  '\n            str_stripped = strip_token(str_, '<PAD>')\n            # str_stripped == 'a sentence'\n\n            str_ = ['<PAD>', 'a', 'sentence', '<PAD>', '<PAD>', '', '']\n            str_stripped = strip_token(str_, '<PAD>', is_token_list=True)\n            # str_stripped == 'a sentence'\n    \"\"\"\n    def _recur_strip(s):\n        if is_str(s):\n            if token == \"\":\n                return ' '.join(s.strip().split())\n            else:\n                return ' '.join(s.strip().split()).\\\n                    replace(' '+token, '').replace(token+' ', '')\n        else:\n            s_ = [_recur_strip(si) for si in s]\n            return _maybe_list_to_array(s_, s)\n\n    s = str_\n\n    if compat:\n        s = compat_as_text(s)\n\n    if is_token_list:\n        s = str_join(s, compat=False)\n\n    strp_str = _recur_strip(s)\n\n    if is_token_list:\n        strp_str = _recur_split(strp_str, str_)\n\n    return strp_str\n\ndef strip_eos(str_, eos_token='<EOS>', is_token_list=False, compat=True):\n    \"\"\"Remove the EOS token and all subsequent tokens.\n\n    If :attr:`is_token_list` is False, then the function assumes tokens in\n    :attr:`str_` are separated with whitespace character.\n\n    Args:\n        str\\_: A `str`, or an `n`-D numpy array or (possibly nested)\n            list of `str`.\n        eos_token (str): The EOS token. Default is '<EOS>' as defined in\n            :class:`~texar.data.SpecialTokens`.EOS\n        is_token_list (bool): Whether each sentence in :attr:`str_` is a list\n            of tokens. If False, each sentence in :attr:`str_` is assumed to\n            contain tokens separated with space character.\n        compat (bool): Whether to convert tokens into `unicode` (Python 2)\n            or `str` (Python 3).\n\n    Returns:\n        Strings of the same structure/shape as :attr:`str_`.\n    \"\"\"\n    def _recur_strip(s):\n        if is_str(s):\n            s_tokens = s.split()\n            if eos_token in s_tokens:\n                return ' '.join(s_tokens[:s_tokens.index(eos_token)])\n            else:\n                return s\n        else:\n            s_ = [_recur_strip(si) for si in s]\n            return _maybe_list_to_array(s_, s)\n\n    s = str_\n\n    if compat:\n        s = compat_as_text(s)\n\n    if is_token_list:\n        s = str_join(s, compat=False)\n\n    strp_str = _recur_strip(s)\n\n    if is_token_list:\n        strp_str = _recur_split(strp_str, str_)\n\n    return strp_str\n_strip_eos_ = strip_eos\n\ndef strip_bos(str_, bos_token='<BOS>', is_token_list=False, compat=True):\n    \"\"\"Remove all leading BOS tokens.\n\n    Note that besides :attr:`bos_token`, all leading and trailing whitespace\n    characters are also removed.\n\n    If :attr:`is_token_list` is False, then the function assumes tokens in\n    :attr:`str_` are separated with whitespace character.\n\n    Args:\n        str\\_: A `str`, or an `n`-D numpy array or (possibly nested)\n            list of `str`.\n        bos_token (str): The BOS token. Default is '<BOS>' as defined in\n            :class:`~texar.data.SpecialTokens`.BOS\n        is_token_list (bool): Whether each sentence in :attr:`str_` is a list\n            of tokens. If False, each sentence in :attr:`str_` is assumed to\n            contain tokens separated with space character.\n        compat (bool): Whether to convert tokens into `unicode` (Python 2)\n            or `str` (Python 3).\n\n    Returns:\n        Strings of the same structure/shape as :attr:`str_`.\n    \"\"\"\n    def _recur_strip(s):\n        if is_str(s):\n            if bos_token == '':\n                return ' '.join(s.strip().split())\n            else:\n                return ' '.join(s.strip().split()).replace(bos_token+' ', '')\n        else:\n            s_ = [_recur_strip(si) for si in s]\n            return _maybe_list_to_array(s_, s)\n\n    s = str_\n\n    if compat:\n        s = compat_as_text(s)\n\n    if is_token_list:\n        s = str_join(s, compat=False)\n\n    strp_str = _recur_strip(s)\n\n    if is_token_list:\n        strp_str = _recur_split(strp_str, str_)\n\n    return strp_str\n_strip_bos_ = strip_bos\n\ndef strip_special_tokens(str_, strip_pad='<PAD>', strip_bos='<BOS>',\n                         strip_eos='<EOS>', is_token_list=False, compat=True):\n    \"\"\"Removes special tokens in strings, including:\n\n        - Removes EOS and all subsequent tokens\n        - Removes leading and and trailing PAD tokens\n        - Removes leading BOS tokens\n\n    Note that besides the special tokens, all leading and trailing whitespace\n    characters are also removed.\n\n    This is a joint function of :func:`strip_eos`, :func:`strip_pad`, and\n    :func:`strip_bos`\n\n    Args:\n        str\\_: A `str`, or an `n`-D numpy array or (possibly nested)\n            list of `str`.\n        strip_pad (str): The PAD token to strip from the strings (i.e., remove\n            the leading and trailing PAD tokens of the strings). Default\n            is '<PAD>' as defined in\n            :class:`~texar.data.SpecialTokens`.PAD.\n            Set to `None` or `False` to disable the stripping.\n        strip_bos (str): The BOS token to strip from the strings (i.e., remove\n            the leading BOS tokens of the strings).\n            Default is '<BOS>' as defined in\n            :class:`~texar.data.SpecialTokens`.BOS.\n            Set to `None` or `False` to disable the stripping.\n        strip_eos (str): The EOS token to strip from the strings (i.e., remove\n            the EOS tokens and all subsequent tokens of the strings).\n            Default is '<EOS>' as defined in\n            :class:`~texar.data.SpecialTokens`.EOS.\n            Set to `None` or `False` to disable the stripping.\n        is_token_list (bool): Whether each sentence in :attr:`str_` is a list\n            of tokens. If False, each sentence in :attr:`str_` is assumed to\n            contain tokens separated with space character.\n        compat (bool): Whether to convert tokens into `unicode` (Python 2)\n            or `str` (Python 3).\n\n    Returns:\n        Strings of the same shape of :attr:`str_` with special tokens stripped.\n    \"\"\"\n    s = str_\n\n    if compat:\n        s = compat_as_text(s)\n\n    if is_token_list:\n        s = str_join(s, compat=False)\n\n    if strip_eos is not None and strip_eos is not False:\n        s = _strip_eos_(s, strip_eos, is_token_list=False, compat=False)\n\n    if strip_pad is not None and strip_pad is not False:\n        s = strip_token(s, strip_pad, is_token_list=False, compat=False)\n\n    if strip_bos is not None and strip_bos is not False:\n        s = _strip_bos_(s, strip_bos, is_token_list=False, compat=False)\n\n    if is_token_list:\n        s = _recur_split(s, str_)\n\n    return s\n\ndef str_join(tokens, sep=' ', compat=True):\n    \"\"\"Concats :attr:`tokens` along the last dimension with intervening\n    occurrences of :attr:`sep`.\n\n    Args:\n        tokens: An `n`-D numpy array or (possibly nested) list of `str`.\n        sep (str): The string intervening between the tokens.\n        compat (bool): Whether to convert tokens into `unicode` (Python 2)\n            or `str` (Python 3).\n\n    Returns:\n        An `(n-1)`-D numpy array (or list) of `str`.\n    \"\"\"\n    def _recur_join(s):\n        if len(s) == 0:\n            return ''\n        elif is_str(s[0]):\n            return sep.join(s)\n        else:\n            s_ = [_recur_join(si) for si in s]\n            return _maybe_list_to_array(s_, s)\n\n    if compat:\n        tokens = compat_as_text(tokens)\n\n    str_ = _recur_join(tokens)\n\n    return str_\n\ndef map_ids_to_strs(ids, vocab, join=True, strip_pad='<PAD>',\n                    strip_bos='<BOS>', strip_eos='<EOS>', compat=True):\n    \"\"\"Transforms `int` indexes to strings by mapping ids to tokens,\n    concatenating tokens into sentences, and stripping special tokens, etc.\n\n    Args:\n        ids: An n-D numpy array or (possibly nested) list of `int` indexes.\n        vocab: An instance of :class:`~texar.data.Vocab`.\n        join (bool): Whether to concat along the last dimension of the\n            the tokens into a string separated with a space character.\n        strip_pad (str): The PAD token to strip from the strings (i.e., remove\n            the leading and trailing PAD tokens of the strings). Default\n            is '<PAD>' as defined in\n            :class:`~texar.data.SpecialTokens`.PAD.\n            Set to `None` or `False` to disable the stripping.\n        strip_bos (str): The BOS token to strip from the strings (i.e., remove\n            the leading BOS tokens of the strings).\n            Default is '<BOS>' as defined in\n            :class:`~texar.data.SpecialTokens`.BOS.\n            Set to `None` or `False` to disable the stripping.\n        strip_eos (str): The EOS token to strip from the strings (i.e., remove\n            the EOS tokens and all subsequent tokens of the strings).\n            Default is '<EOS>' as defined in\n            :class:`~texar.data.SpecialTokens`.EOS.\n            Set to `None` or `False` to disable the stripping.\n\n    Returns:\n        If :attr:`join` is True, returns a `(n-1)`-D numpy array (or list) of\n        concatenated strings. If :attr:`join` is False, returns an `n`-D numpy\n        array (or list) of str tokens.\n\n    Example:\n\n        .. code-block:: python\n\n            text_ids = [[1, 9, 6, 2, 0, 0], [1, 28, 7, 8, 2, 0]]\n\n            text = map_ids_to_strs(text_ids, data.vocab)\n            # text == ['a sentence', 'parsed from ids']\n\n            text = map_ids_to_strs(\n                text_ids, data.vocab, join=False,\n                strip_pad=None, strip_bos=None, strip_eos=None)\n            # text == [['<BOS>', 'a', 'sentence', '<EOS>', '<PAD>', '<PAD>'],\n            #          ['<BOS>', 'parsed', 'from', 'ids', '<EOS>', '<PAD>']]\n    \"\"\"\n    tokens = vocab.map_ids_to_tokens_py(ids)\n    if isinstance(ids, (list, tuple)):\n        tokens = tokens.tolist()\n\n    if compat:\n        tokens = compat_as_text(tokens)\n\n    str_ = str_join(tokens, compat=False)\n\n    str_ = strip_special_tokens(\n        str_, strip_pad=strip_pad, strip_bos=strip_bos, strip_eos=strip_eos,\n        compat=False)\n\n    if join:\n        return str_\n    else:\n        return _recur_split(str_, ids)\n\ndef ceildiv(a, b):\n    \"\"\"Divides with ceil.\n\n    E.g., `5 / 2 = 2.5`, `ceildiv(5, 2) = 3`.\n\n    Args:\n        a (int): Dividend integer.\n        b (int): Divisor integer.\n\n    Returns:\n        int: Ceil quotient.\n    \"\"\"\n    return -(-a // b)\n\ndef straight_through(fw_tensor, bw_tensor):\n    \"\"\"Use a tensor in forward pass while backpropagating gradient to another.\n\n    Args:\n        fw_tensor: A tensor to be used in the forward pass.\n        bw_tensor: A tensor to which gradient is backpropagated. Must have the\n            same shape and type with :attr:`fw_tensor`.\n\n    Returns:\n        A tensor of the same shape and value with :attr:`fw_tensor` but will\n        direct gradient to bw_tensor.\n    \"\"\"\n    return tf.stop_gradient(fw_tensor) + bw_tensor - tf.stop_gradient(bw_tensor)\n"
  },
  {
    "path": "texar_repo/texar/utils/utils_io.py",
    "content": "# -*- coding: utf-8 -*-\n# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nUtility functions related to input/output.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\nfrom __future__ import unicode_literals\n\n# pylint: disable=invalid-name, redefined-builtin, too-many-arguments\n\nfrom io import open\nimport os\nimport importlib\nimport yaml\n\nimport tensorflow as tf\nfrom tensorflow import gfile\n\nas_text = tf.compat.as_text\n\n__all__ = [\n    \"load_config_single\",\n    \"load_config\",\n    \"write_paired_text\",\n    \"maybe_create_dir\",\n    \"get_files\"\n]\n\n#def get_tf_logger(fname,\n#                  verbosity=tf.logging.INFO,\n#                  to_stdio=False,\n#                  stdio_verbosity=None):\n#    \"\"\"Creates TF logger that allows to specify log filename and whether to\n#    print to stdio at the same time.\n#\n#    Args:\n#        fname (str): The log filename.\n#        verbosity: The threshold for what messages will be logged. Default is\n#            `INFO`. Other options include `DEBUG`, `ERROR`, `FATAL`, and `WARN`.\n#            See :tf_main:`tf.logging <logging>`.\n#        to_stdio (bool): Whether to print messages to stdio at the same time.\n#        stido_verbosity (optional): The verbosity level when printing to stdio.\n#            If `None` (default), the level is set to be the same as\n#            :attr:`verbosity`. Ignored if :attr:`to_stdio` is False.\n#\n#    Returns:\n#        The TF logger.\n#    \"\"\"\n\ndef _load_config_python(fname):\n    config = {}\n\n    config_module = importlib.import_module(fname.rstrip('.py'))\n    for key in dir(config_module):\n        if not (key.startswith('__') and key.endswith('__')):\n            config[key] = getattr(config_module, key)\n\n    return config\n\ndef _load_config_yaml(fname):\n    with gfile.GFile(fname) as config_file:\n        config = yaml.load(config_file)\n    return config\n\ndef load_config_single(fname, config=None):\n    \"\"\"Loads config from a single file.\n\n    The config file can be either a Python file (with suffix '.py')\n    or a YAML file. If the filename is not suffixed with '.py', the file is\n    parsed as YAML.\n\n    Args:\n        fname (str): The config file name.\n        config (dict, optional): A config dict to which new configurations are\n            added. If `None`, a new config dict is created.\n\n    Returns:\n        A `dict` of configurations.\n    \"\"\"\n    if fname.endswith('.py'):\n        new_config = _load_config_python(fname)\n    else:\n        new_config = _load_config_yaml(fname)\n\n    if config is None:\n        config = new_config\n    else:\n        for key, value in new_config.items():\n            if key in config:\n                if isinstance(config[key], dict):\n                    config[key].update(value)\n                else:\n                    config[key] = value\n            else:\n                config[key] = value\n\n    return config\n\ndef load_config(config_path, config=None):\n    \"\"\"Loads configs from (possibly multiple) file(s).\n\n    A config file can be either a Python file (with suffix '.py')\n    or a YAML file. If the filename is not suffixed with '.py', the file is\n    parsed as YAML.\n\n    Args:\n        config_path: Paths to configuration files. This can be a `list` of\n            config file names, or a path to a directory in which all files\n            are loaded, or a string of multiple file names separated by commas.\n        config (dict, optional): A config dict to which new configurations are\n            added. If `None`, a new config dict is created.\n\n    Returns:\n        A `dict` of configurations.\n    \"\"\"\n    fnames = []\n    if isinstance(config_path, (list, tuple)):\n        fnames = list(config_path)\n    elif gfile.IsDirectory(config_path):\n        for fname in gfile.ListDirectory(config_path):\n            fname = os.path.join(config_path, fname)\n            if not gfile.IsDirectory(fname):\n                fnames.append(fname)\n    else:\n        for fname in config_path.split(\",\"):\n            fname = fname.strip()\n            if not fname:\n                continue\n            fnames.append(fname)\n\n    if config is None:\n        config = {}\n\n    for fname in fnames:\n        config = load_config_single(fname, config)\n\n    return config\n\n# pylint: disable=too-many-locals\ndef write_paired_text(src, tgt, fname, append=False, mode='h', sep='\\t',\n                      src_fname_suffix='src', tgt_fname_suffix='tgt'):\n    \"\"\"Writes paired text to a file.\n\n    Args:\n        src: A list (or array) of `str` source text.\n        tgt: A list (or array) of `str` target text.\n        fname (str): The output filename.\n        append (bool): Whether append content to the end of the file if exists.\n        mode (str): The mode of writing, with the following options:\n\n            - **'h'**: The \"horizontal\" mode. Each source target pair is \\\n                written in one line, intervened with :attr:`sep`, e.g.::\n\n                    source_1 target_1\n                    source_2 target_2\n\n            - **'v'**: The \"vertical\" mode. Each source target pair is \\\n                written in two consecutive lines, e.g::\n\n                    source_1\n                    target_1\n                    source_2\n                    target_2\n\n            - **'s'**: The \"separate\" mode. Each source target pair is \\\n                    written in corresponding lines of two files named \\\n                    as `\"{fname}.{src_fname_suffix}\"` \\\n                    and `\"{fname}.{tgt_fname_suffix}\"`, respectively.\n\n        sep (str): The string intervening between source and target. Used\n            when :attr:`mode` is set to 'h'.\n        src_fname_suffix (str): Used when :attr:`mode` is 's'. The suffix to\n            the source output filename. E.g., with\n            `(fname='output', src_fname_suffix='src')`, the output source file\n            is named as `output.src`.\n        tgt_fname_suffix (str): Used when :attr:`mode` is 's'. The suffix to\n            the target output filename.\n\n    Returns:\n        The fileanme(s). If `mode` == 'h' or 'v', returns\n        :attr:`fname`. If `mode` == 's', returns a list of filenames\n        `[\"{fname}.src\", \"{fname}.tgt\"]`.\n    \"\"\"\n    fmode = 'a' if append else 'w'\n    if mode == 's':\n        fn_src = '{}.{}'.format(fname, src_fname_suffix)\n        fn_tgt = '{}.{}'.format(fname, tgt_fname_suffix)\n        with open(fn_src, fmode, encoding='utf-8') as fs:\n            fs.write(as_text('\\n'.join(src)))\n            fs.write('\\n')\n        with open(fn_tgt, fmode, encoding='utf-8') as ft:\n            ft.write(as_text('\\n'.join(tgt)))\n            ft.write('\\n')\n        return fn_src, fn_tgt\n    else:\n        with open(fname, fmode, encoding='utf-8') as f:\n            for s, t in zip(src, tgt):\n                if mode == 'h':\n                    text = '{}{}{}\\n'.format(as_text(s), sep, as_text(t))\n                    f.write(as_text(text))\n                elif mode == 'v':\n                    text = '{}\\n{}\\n'.format(as_text(s), as_text(t))\n                    f.write(as_text(text))\n                else:\n                    raise ValueError('Unknown mode: {}'.format(mode))\n        return fname\n\ndef maybe_create_dir(dirname):\n    \"\"\"Creates directory if doesn't exist\n    \"\"\"\n    if not tf.gfile.IsDirectory(dirname):\n        tf.gfile.MakeDirs(dirname)\n        return True\n    return False\n\n\ndef get_files(file_paths):\n    \"\"\"Gets a list of file paths given possibly a pattern :attr:`file_paths`.\n\n    Adapted from `tf.contrib.slim.data.parallel_reader.get_data_files`.\n\n    Args:\n        file_paths: A (list of) path to the files. The path can be a pattern,\n            e.g., /path/to/train*, /path/to/train[12]\n\n    Returns:\n        A list of file paths.\n\n    Raises:\n        ValueError: If no files are not found\n    \"\"\"\n    if isinstance(file_paths, (list, tuple)):\n        files = []\n        for f in file_paths:\n            files += get_files(f)\n    else:\n        if '*' in file_paths or '?' in file_paths or '[' in file_paths:\n            files = tf.gfile.Glob(file_paths)\n        else:\n            files = [file_paths]\n    if not files:\n        raise ValueError('No data files found in %s' % (file_paths,))\n    return files\n"
  },
  {
    "path": "texar_repo/texar/utils/utils_test.py",
    "content": "# -*- coding: utf-8 -*-\n#\n\"\"\"\nUnit tests for utility functions.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nimport tempfile\nimport numpy as np\n\nimport tensorflow as tf\n\nfrom texar.utils import utils\nfrom texar.data.vocabulary import Vocab\n\n\nclass UtilsTest(tf.test.TestCase):\n    \"\"\"Tests utility functions.\n    \"\"\"\n\n    def test_dict_patch(self):\n        \"\"\"Tests :meth:`texar.utils.dict_patch`.\n        \"\"\"\n        src_dict = {\n            \"k1\": \"k1\",\n            \"k_dict_1\": {\n                \"kd1_k1\": \"kd1_k1\",\n                \"kd1_k2\": \"kd1_k2\"\n            },\n            \"k_dict_2\": {\n                \"kd2_k1\": \"kd2_k1\"\n            }\n        }\n        tgt_dict = {\n            \"k1\": \"k1_tgt\",\n            \"k_dict_1\": {\n                \"kd1_k1\": \"kd1_k1\"\n            },\n            \"k_dict_2\": \"kd2_not_dict\"\n        }\n\n        patched_dict = utils.dict_patch(tgt_dict, src_dict)\n        self.assertEqual(patched_dict[\"k1\"], tgt_dict[\"k1\"])\n        self.assertEqual(patched_dict[\"k_dict_1\"], src_dict[\"k_dict_1\"])\n        self.assertEqual(patched_dict[\"k_dict_2\"], tgt_dict[\"k_dict_2\"])\n\n    def test_strip_token(self):\n        \"\"\"Tests :func:`texar.utils.strip_token`\n        \"\"\"\n        str_ = \" <PAD>  <PAD>\\t  i am <PAD> \\t <PAD>  \\t\"\n        self.assertEqual(utils.strip_token(str_, \"<PAD>\"), \"i am\")\n        self.assertEqual(utils.strip_token(str_, \"\"),\n                         \"<PAD> <PAD> i am <PAD> <PAD>\")\n        self.assertEqual(utils.strip_token([str_], \"<PAD>\"), [\"i am\"])\n        self.assertEqual(\n            utils.strip_token(np.asarray([str_]), \"<PAD>\"),\n            [\"i am\"])\n        self.assertEqual(type(utils.strip_token(np.asarray([str_]), \"<PAD>\")),\n                         np.ndarray)\n        self.assertEqual(\n            utils.strip_token([[[str_]], ['']], \"<PAD>\"),\n            [[[\"i am\"]], ['']])\n\n        str_ = str_.split()\n        self.assertEqual(utils.strip_token(str_, \"<PAD>\", is_token_list=True),\n                         [\"i\", \"am\"])\n        self.assertEqual(utils.strip_token([str_], \"<PAD>\", is_token_list=True),\n                         [[\"i\", \"am\"]])\n\n    def test_strip_bos(self):\n        \"\"\"Tests :func:`texar.utils.strip_bos`\n        \"\"\"\n        str_ = \"<BOS> i am\"\n        self.assertEqual(utils.strip_bos(str_, \"<BOS>\"), \"i am\")\n        self.assertEqual(utils.strip_bos(str_, \"\"), \"<BOS> i am\")\n        self.assertEqual(utils.strip_bos([str_], \"<BOS>\"), [\"i am\"])\n\n        str_ = str_.split()\n        self.assertEqual(utils.strip_bos(str_, \"<BOS>\", is_token_list=True),\n                         [\"i\", \"am\"])\n        self.assertEqual(utils.strip_bos([str_], \"<BOS>\", is_token_list=True),\n                         [[\"i\", \"am\"]])\n\n    def test_strip_eos(self):\n        \"\"\"Tests :func:`texar.utils.strip_eos`\n        \"\"\"\n        str_ = \"i am <EOS>\"\n        self.assertEqual(utils.strip_eos(str_, \"<EOS>\"), \"i am\")\n        self.assertEqual(utils.strip_eos([str_], \"<EOS>\"), [\"i am\"])\n\n        str_ = str_.split()\n        self.assertEqual(utils.strip_eos(str_, \"<EOS>\", is_token_list=True),\n                         [\"i\", \"am\"])\n        self.assertEqual(utils.strip_eos([str_], \"<EOS>\", is_token_list=True),\n                         [[\"i\", \"am\"]])\n\n    def test_strip_special_tokens(self):\n        \"\"\"Test :func:`texar.utils.strip_special_tokens`\n        \"\"\"\n        str_ = \"<BOS> i am <EOS> <PAD> <PAD>\"\n        self.assertEqual(utils.strip_special_tokens(str_), \"i am\")\n        self.assertEqual(utils.strip_special_tokens([str_]), [\"i am\"])\n\n        str_ = str_.split()\n        self.assertEqual(utils.strip_special_tokens(str_, is_token_list=True),\n                         [\"i\", \"am\"])\n        self.assertEqual(utils.strip_special_tokens([str_], is_token_list=True),\n                         [[\"i\", \"am\"]])\n\n    def test_str_join(self):\n        \"\"\"Tests :func:`texar.utils.str_join`\n        \"\"\"\n        tokens = np.ones([2, 2, 3], dtype='str')\n\n        str_ = utils.str_join(tokens)\n        np.testing.assert_array_equal(\n            str_, np.asarray([['1 1 1', '1 1 1'], ['1 1 1', '1 1 1']]))\n        self.assertIsInstance(str_, np.ndarray)\n\n        str_ = utils.str_join(tokens.tolist())\n        np.testing.assert_array_equal(\n            str_, [['1 1 1', '1 1 1'], ['1 1 1', '1 1 1']])\n        self.assertIsInstance(str_, list)\n\n        tokens = [[], ['1', '1']]\n        str_ = utils.str_join(tokens)\n        np.testing.assert_array_equal(str_, ['', '1 1'])\n\n    def test_uniquify_str(self):\n        \"\"\"Tests :func:`texar.utils.uniquify_str`.\n        \"\"\"\n        str_set = ['str']\n        unique_str = utils.uniquify_str('str', str_set)\n        self.assertEqual(unique_str, 'str_1')\n\n        str_set.append('str_1')\n        str_set.append('str_2')\n        unique_str = utils.uniquify_str('str', str_set)\n        self.assertEqual(unique_str, 'str_3')\n\n    def test_map_ids_to_strs(self):\n        \"\"\"Tests :func:`texar.utils.map_ids_to_strs`.\n        \"\"\"\n        vocab_list = ['word', '词']\n        vocab_file = tempfile.NamedTemporaryFile()\n        vocab_file.write('\\n'.join(vocab_list).encode(\"utf-8\"))\n        vocab_file.flush()\n        vocab = Vocab(vocab_file.name)\n\n        text = [['<BOS>', 'word', '词', '<EOS>', '<PAD>'],\n                ['word', '词', 'word', '词', '<PAD>']]\n        text = np.asarray(text)\n        ids = vocab.map_tokens_to_ids_py(text)\n\n        ids = ids.tolist()\n        text_ = utils.map_ids_to_strs(ids, vocab)\n\n        self.assertEqual(text_[0], 'word 词')\n        self.assertEqual(text_[1], 'word 词 word 词')\n\nif __name__ == \"__main__\":\n    tf.test.main()\n\n"
  },
  {
    "path": "texar_repo/texar/utils/variables.py",
    "content": "# Copyright 2018 The Texar Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#      http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nUtility functions related to variables.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\n\n# pylint: disable=invalid-name\n\nimport tensorflow as tf\n\n__all__ = [\n    \"get_unique_named_variable_scope\",\n    \"add_variable\",\n    \"collect_trainable_variables\"\n]\n\n\ndef get_unique_named_variable_scope(base_name):\n    \"\"\"Returns a variable scope with a unique name.\n\n    Args:\n        base_name (str): The base name to uniquified.\n\n    Returns:\n        An instance of :tf_main:`variable_scope <variable_scope>`.\n\n    Example:\n\n        .. code-block:: python\n\n            vs = get_unique_named_variable_scope('base_name')\n            with tf.variable_scope(vs):\n                ....\n    \"\"\"\n    with tf.variable_scope(None, default_name=base_name) as vs:\n        return vs\n\ndef add_variable(variable, var_list):\n    \"\"\"Adds variable to a given list.\n\n    Args:\n        variable: A (list of) variable(s).\n        var_list (list): The list where the :attr:`variable` are added to.\n    \"\"\"\n    if isinstance(variable, (list, tuple)):\n        for var in variable:\n            add_variable(var, var_list)\n    else:\n        if variable not in var_list:\n            var_list.append(variable)\n\ndef collect_trainable_variables(modules):\n    \"\"\"Collects all trainable variables of modules.\n\n    Trainable variables included in multiple modules occur only once in the\n    returned list.\n\n    Args:\n        modules: A (list of) instance of the subclasses of\n            :class:`~texar.modules.ModuleBase`.\n\n    Returns:\n        A list of trainable variables in the modules.\n    \"\"\"\n    if not isinstance(modules, (list, tuple)):\n        modules = [modules]\n\n    var_list = []\n    for mod in modules:\n        add_variable(mod.trainable_variables, var_list)\n\n    return var_list\n"
  },
  {
    "path": "uncased_L-12_H-768_A-12/bert_config.json",
    "content": "{\n  \"attention_probs_dropout_prob\": 0.1,\n  \"hidden_act\": \"gelu\",\n  \"hidden_dropout_prob\": 0.1,\n  \"hidden_size\": 768,\n  \"initializer_range\": 0.02,\n  \"intermediate_size\": 3072,\n  \"max_position_embeddings\": 512,\n  \"num_attention_heads\": 12,\n  \"num_hidden_layers\": 12,\n  \"type_vocab_size\": 2,\n  \"vocab_size\": 30522\n}\n"
  },
  {
    "path": "uncased_L-12_H-768_A-12/vocab.txt",
    "content": "[PAD]\n[unused0]\n[unused1]\n[unused2]\n[unused3]\n[unused4]\n[unused5]\n[unused6]\n[unused7]\n[unused8]\n[unused9]\n[unused10]\n[unused11]\n[unused12]\n[unused13]\n[unused14]\n[unused15]\n[unused16]\n[unused17]\n[unused18]\n[unused19]\n[unused20]\n[unused21]\n[unused22]\n[unused23]\n[unused24]\n[unused25]\n[unused26]\n[unused27]\n[unused28]\n[unused29]\n[unused30]\n[unused31]\n[unused32]\n[unused33]\n[unused34]\n[unused35]\n[unused36]\n[unused37]\n[unused38]\n[unused39]\n[unused40]\n[unused41]\n[unused42]\n[unused43]\n[unused44]\n[unused45]\n[unused46]\n[unused47]\n[unused48]\n[unused49]\n[unused50]\n[unused51]\n[unused52]\n[unused53]\n[unused54]\n[unused55]\n[unused56]\n[unused57]\n[unused58]\n[unused59]\n[unused60]\n[unused61]\n[unused62]\n[unused63]\n[unused64]\n[unused65]\n[unused66]\n[unused67]\n[unused68]\n[unused69]\n[unused70]\n[unused71]\n[unused72]\n[unused73]\n[unused74]\n[unused75]\n[unused76]\n[unused77]\n[unused78]\n[unused79]\n[unused80]\n[unused81]\n[unused82]\n[unused83]\n[unused84]\n[unused85]\n[unused86]\n[unused87]\n[unused88]\n[unused89]\n[unused90]\n[unused91]\n[unused92]\n[unused93]\n[unused94]\n[unused95]\n[unused96]\n[unused97]\n[unused98]\n[UNK]\n[CLS]\n[SEP]\n[MASK]\n[unused99]\n[unused100]\n[unused101]\n[unused102]\n[unused103]\n[unused104]\n[unused105]\n[unused106]\n[unused107]\n[unused108]\n[unused109]\n[unused110]\n[unused111]\n[unused112]\n[unused113]\n[unused114]\n[unused115]\n[unused116]\n[unused117]\n[unused118]\n[unused119]\n[unused120]\n[unused121]\n[unused122]\n[unused123]\n[unused124]\n[unused125]\n[unused126]\n[unused127]\n[unused128]\n[unused129]\n[unused130]\n[unused131]\n[unused132]\n[unused133]\n[unused134]\n[unused135]\n[unused136]\n[unused137]\n[unused138]\n[unused139]\n[unused140]\n[unused141]\n[unused142]\n[unused143]\n[unused144]\n[unused145]\n[unused146]\n[unused147]\n[unused148]\n[unused149]\n[unused150]\n[unused151]\n[unused152]\n[unused153]\n[unused154]\n[unused155]\n[unused156]\n[unused157]\n[unused158]\n[unused159]\n[unused160]\n[unused161]\n[unused162]\n[unused163]\n[unused164]\n[unused165]\n[unused166]\n[unused167]\n[unused168]\n[unused169]\n[unused170]\n[unused171]\n[unused172]\n[unused173]\n[unused174]\n[unused175]\n[unused176]\n[unused177]\n[unused178]\n[unused179]\n[unused180]\n[unused181]\n[unused182]\n[unused183]\n[unused184]\n[unused185]\n[unused186]\n[unused187]\n[unused188]\n[unused189]\n[unused190]\n[unused191]\n[unused192]\n[unused193]\n[unused194]\n[unused195]\n[unused196]\n[unused197]\n[unused198]\n[unused199]\n[unused200]\n[unused201]\n[unused202]\n[unused203]\n[unused204]\n[unused205]\n[unused206]\n[unused207]\n[unused208]\n[unused209]\n[unused210]\n[unused211]\n[unused212]\n[unused213]\n[unused214]\n[unused215]\n[unused216]\n[unused217]\n[unused218]\n[unused219]\n[unused220]\n[unused221]\n[unused222]\n[unused223]\n[unused224]\n[unused225]\n[unused226]\n[unused227]\n[unused228]\n[unused229]\n[unused230]\n[unused231]\n[unused232]\n[unused233]\n[unused234]\n[unused235]\n[unused236]\n[unused237]\n[unused238]\n[unused239]\n[unused240]\n[unused241]\n[unused242]\n[unused243]\n[unused244]\n[unused245]\n[unused246]\n[unused247]\n[unused248]\n[unused249]\n[unused250]\n[unused251]\n[unused252]\n[unused253]\n[unused254]\n[unused255]\n[unused256]\n[unused257]\n[unused258]\n[unused259]\n[unused260]\n[unused261]\n[unused262]\n[unused263]\n[unused264]\n[unused265]\n[unused266]\n[unused267]\n[unused268]\n[unused269]\n[unused270]\n[unused271]\n[unused272]\n[unused273]\n[unused274]\n[unused275]\n[unused276]\n[unused277]\n[unused278]\n[unused279]\n[unused280]\n[unused281]\n[unused282]\n[unused283]\n[unused284]\n[unused285]\n[unused286]\n[unused287]\n[unused288]\n[unused289]\n[unused290]\n[unused291]\n[unused292]\n[unused293]\n[unused294]\n[unused295]\n[unused296]\n[unused297]\n[unused298]\n[unused299]\n[unused300]\n[unused301]\n[unused302]\n[unused303]\n[unused304]\n[unused305]\n[unused306]\n[unused307]\n[unused308]\n[unused309]\n[unused310]\n[unused311]\n[unused312]\n[unused313]\n[unused314]\n[unused315]\n[unused316]\n[unused317]\n[unused318]\n[unused319]\n[unused320]\n[unused321]\n[unused322]\n[unused323]\n[unused324]\n[unused325]\n[unused326]\n[unused327]\n[unused328]\n[unused329]\n[unused330]\n[unused331]\n[unused332]\n[unused333]\n[unused334]\n[unused335]\n[unused336]\n[unused337]\n[unused338]\n[unused339]\n[unused340]\n[unused341]\n[unused342]\n[unused343]\n[unused344]\n[unused345]\n[unused346]\n[unused347]\n[unused348]\n[unused349]\n[unused350]\n[unused351]\n[unused352]\n[unused353]\n[unused354]\n[unused355]\n[unused356]\n[unused357]\n[unused358]\n[unused359]\n[unused360]\n[unused361]\n[unused362]\n[unused363]\n[unused364]\n[unused365]\n[unused366]\n[unused367]\n[unused368]\n[unused369]\n[unused370]\n[unused371]\n[unused372]\n[unused373]\n[unused374]\n[unused375]\n[unused376]\n[unused377]\n[unused378]\n[unused379]\n[unused380]\n[unused381]\n[unused382]\n[unused383]\n[unused384]\n[unused385]\n[unused386]\n[unused387]\n[unused388]\n[unused389]\n[unused390]\n[unused391]\n[unused392]\n[unused393]\n[unused394]\n[unused395]\n[unused396]\n[unused397]\n[unused398]\n[unused399]\n[unused400]\n[unused401]\n[unused402]\n[unused403]\n[unused404]\n[unused405]\n[unused406]\n[unused407]\n[unused408]\n[unused409]\n[unused410]\n[unused411]\n[unused412]\n[unused413]\n[unused414]\n[unused415]\n[unused416]\n[unused417]\n[unused418]\n[unused419]\n[unused420]\n[unused421]\n[unused422]\n[unused423]\n[unused424]\n[unused425]\n[unused426]\n[unused427]\n[unused428]\n[unused429]\n[unused430]\n[unused431]\n[unused432]\n[unused433]\n[unused434]\n[unused435]\n[unused436]\n[unused437]\n[unused438]\n[unused439]\n[unused440]\n[unused441]\n[unused442]\n[unused443]\n[unused444]\n[unused445]\n[unused446]\n[unused447]\n[unused448]\n[unused449]\n[unused450]\n[unused451]\n[unused452]\n[unused453]\n[unused454]\n[unused455]\n[unused456]\n[unused457]\n[unused458]\n[unused459]\n[unused460]\n[unused461]\n[unused462]\n[unused463]\n[unused464]\n[unused465]\n[unused466]\n[unused467]\n[unused468]\n[unused469]\n[unused470]\n[unused471]\n[unused472]\n[unused473]\n[unused474]\n[unused475]\n[unused476]\n[unused477]\n[unused478]\n[unused479]\n[unused480]\n[unused481]\n[unused482]\n[unused483]\n[unused484]\n[unused485]\n[unused486]\n[unused487]\n[unused488]\n[unused489]\n[unused490]\n[unused491]\n[unused492]\n[unused493]\n[unused494]\n[unused495]\n[unused496]\n[unused497]\n[unused498]\n[unused499]\n[unused500]\n[unused501]\n[unused502]\n[unused503]\n[unused504]\n[unused505]\n[unused506]\n[unused507]\n[unused508]\n[unused509]\n[unused510]\n[unused511]\n[unused512]\n[unused513]\n[unused514]\n[unused515]\n[unused516]\n[unused517]\n[unused518]\n[unused519]\n[unused520]\n[unused521]\n[unused522]\n[unused523]\n[unused524]\n[unused525]\n[unused526]\n[unused527]\n[unused528]\n[unused529]\n[unused530]\n[unused531]\n[unused532]\n[unused533]\n[unused534]\n[unused535]\n[unused536]\n[unused537]\n[unused538]\n[unused539]\n[unused540]\n[unused541]\n[unused542]\n[unused543]\n[unused544]\n[unused545]\n[unused546]\n[unused547]\n[unused548]\n[unused549]\n[unused550]\n[unused551]\n[unused552]\n[unused553]\n[unused554]\n[unused555]\n[unused556]\n[unused557]\n[unused558]\n[unused559]\n[unused560]\n[unused561]\n[unused562]\n[unused563]\n[unused564]\n[unused565]\n[unused566]\n[unused567]\n[unused568]\n[unused569]\n[unused570]\n[unused571]\n[unused572]\n[unused573]\n[unused574]\n[unused575]\n[unused576]\n[unused577]\n[unused578]\n[unused579]\n[unused580]\n[unused581]\n[unused582]\n[unused583]\n[unused584]\n[unused585]\n[unused586]\n[unused587]\n[unused588]\n[unused589]\n[unused590]\n[unused591]\n[unused592]\n[unused593]\n[unused594]\n[unused595]\n[unused596]\n[unused597]\n[unused598]\n[unused599]\n[unused600]\n[unused601]\n[unused602]\n[unused603]\n[unused604]\n[unused605]\n[unused606]\n[unused607]\n[unused608]\n[unused609]\n[unused610]\n[unused611]\n[unused612]\n[unused613]\n[unused614]\n[unused615]\n[unused616]\n[unused617]\n[unused618]\n[unused619]\n[unused620]\n[unused621]\n[unused622]\n[unused623]\n[unused624]\n[unused625]\n[unused626]\n[unused627]\n[unused628]\n[unused629]\n[unused630]\n[unused631]\n[unused632]\n[unused633]\n[unused634]\n[unused635]\n[unused636]\n[unused637]\n[unused638]\n[unused639]\n[unused640]\n[unused641]\n[unused642]\n[unused643]\n[unused644]\n[unused645]\n[unused646]\n[unused647]\n[unused648]\n[unused649]\n[unused650]\n[unused651]\n[unused652]\n[unused653]\n[unused654]\n[unused655]\n[unused656]\n[unused657]\n[unused658]\n[unused659]\n[unused660]\n[unused661]\n[unused662]\n[unused663]\n[unused664]\n[unused665]\n[unused666]\n[unused667]\n[unused668]\n[unused669]\n[unused670]\n[unused671]\n[unused672]\n[unused673]\n[unused674]\n[unused675]\n[unused676]\n[unused677]\n[unused678]\n[unused679]\n[unused680]\n[unused681]\n[unused682]\n[unused683]\n[unused684]\n[unused685]\n[unused686]\n[unused687]\n[unused688]\n[unused689]\n[unused690]\n[unused691]\n[unused692]\n[unused693]\n[unused694]\n[unused695]\n[unused696]\n[unused697]\n[unused698]\n[unused699]\n[unused700]\n[unused701]\n[unused702]\n[unused703]\n[unused704]\n[unused705]\n[unused706]\n[unused707]\n[unused708]\n[unused709]\n[unused710]\n[unused711]\n[unused712]\n[unused713]\n[unused714]\n[unused715]\n[unused716]\n[unused717]\n[unused718]\n[unused719]\n[unused720]\n[unused721]\n[unused722]\n[unused723]\n[unused724]\n[unused725]\n[unused726]\n[unused727]\n[unused728]\n[unused729]\n[unused730]\n[unused731]\n[unused732]\n[unused733]\n[unused734]\n[unused735]\n[unused736]\n[unused737]\n[unused738]\n[unused739]\n[unused740]\n[unused741]\n[unused742]\n[unused743]\n[unused744]\n[unused745]\n[unused746]\n[unused747]\n[unused748]\n[unused749]\n[unused750]\n[unused751]\n[unused752]\n[unused753]\n[unused754]\n[unused755]\n[unused756]\n[unused757]\n[unused758]\n[unused759]\n[unused760]\n[unused761]\n[unused762]\n[unused763]\n[unused764]\n[unused765]\n[unused766]\n[unused767]\n[unused768]\n[unused769]\n[unused770]\n[unused771]\n[unused772]\n[unused773]\n[unused774]\n[unused775]\n[unused776]\n[unused777]\n[unused778]\n[unused779]\n[unused780]\n[unused781]\n[unused782]\n[unused783]\n[unused784]\n[unused785]\n[unused786]\n[unused787]\n[unused788]\n[unused789]\n[unused790]\n[unused791]\n[unused792]\n[unused793]\n[unused794]\n[unused795]\n[unused796]\n[unused797]\n[unused798]\n[unused799]\n[unused800]\n[unused801]\n[unused802]\n[unused803]\n[unused804]\n[unused805]\n[unused806]\n[unused807]\n[unused808]\n[unused809]\n[unused810]\n[unused811]\n[unused812]\n[unused813]\n[unused814]\n[unused815]\n[unused816]\n[unused817]\n[unused818]\n[unused819]\n[unused820]\n[unused821]\n[unused822]\n[unused823]\n[unused824]\n[unused825]\n[unused826]\n[unused827]\n[unused828]\n[unused829]\n[unused830]\n[unused831]\n[unused832]\n[unused833]\n[unused834]\n[unused835]\n[unused836]\n[unused837]\n[unused838]\n[unused839]\n[unused840]\n[unused841]\n[unused842]\n[unused843]\n[unused844]\n[unused845]\n[unused846]\n[unused847]\n[unused848]\n[unused849]\n[unused850]\n[unused851]\n[unused852]\n[unused853]\n[unused854]\n[unused855]\n[unused856]\n[unused857]\n[unused858]\n[unused859]\n[unused860]\n[unused861]\n[unused862]\n[unused863]\n[unused864]\n[unused865]\n[unused866]\n[unused867]\n[unused868]\n[unused869]\n[unused870]\n[unused871]\n[unused872]\n[unused873]\n[unused874]\n[unused875]\n[unused876]\n[unused877]\n[unused878]\n[unused879]\n[unused880]\n[unused881]\n[unused882]\n[unused883]\n[unused884]\n[unused885]\n[unused886]\n[unused887]\n[unused888]\n[unused889]\n[unused890]\n[unused891]\n[unused892]\n[unused893]\n[unused894]\n[unused895]\n[unused896]\n[unused897]\n[unused898]\n[unused899]\n[unused900]\n[unused901]\n[unused902]\n[unused903]\n[unused904]\n[unused905]\n[unused906]\n[unused907]\n[unused908]\n[unused909]\n[unused910]\n[unused911]\n[unused912]\n[unused913]\n[unused914]\n[unused915]\n[unused916]\n[unused917]\n[unused918]\n[unused919]\n[unused920]\n[unused921]\n[unused922]\n[unused923]\n[unused924]\n[unused925]\n[unused926]\n[unused927]\n[unused928]\n[unused929]\n[unused930]\n[unused931]\n[unused932]\n[unused933]\n[unused934]\n[unused935]\n[unused936]\n[unused937]\n[unused938]\n[unused939]\n[unused940]\n[unused941]\n[unused942]\n[unused943]\n[unused944]\n[unused945]\n[unused946]\n[unused947]\n[unused948]\n[unused949]\n[unused950]\n[unused951]\n[unused952]\n[unused953]\n[unused954]\n[unused955]\n[unused956]\n[unused957]\n[unused958]\n[unused959]\n[unused960]\n[unused961]\n[unused962]\n[unused963]\n[unused964]\n[unused965]\n[unused966]\n[unused967]\n[unused968]\n[unused969]\n[unused970]\n[unused971]\n[unused972]\n[unused973]\n[unused974]\n[unused975]\n[unused976]\n[unused977]\n[unused978]\n[unused979]\n[unused980]\n[unused981]\n[unused982]\n[unused983]\n[unused984]\n[unused985]\n[unused986]\n[unused987]\n[unused988]\n[unused989]\n[unused990]\n[unused991]\n[unused992]\n[unused993]\n!\n\"\n#\n$\n%\n&\n'\n(\n)\n*\n+\n,\n-\n.\n/\n0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n:\n;\n<\n=\n>\n?\n@\n[\n\\\n]\n^\n_\n`\na\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\nz\n{\n|\n}\n~\n¡\n¢\n£\n¤\n¥\n¦\n§\n¨\n©\nª\n«\n¬\n®\n°\n±\n²\n³\n´\nµ\n¶\n·\n¹\nº\n»\n¼\n½\n¾\n¿\n×\nß\næ\nð\n÷\nø\nþ\nđ\nħ\nı\nł\nŋ\nœ\nƒ\nɐ\nɑ\nɒ\nɔ\nɕ\nə\nɛ\nɡ\nɣ\nɨ\nɪ\nɫ\nɬ\nɯ\nɲ\nɴ\nɹ\nɾ\nʀ\nʁ\nʂ\nʃ\nʉ\nʊ\nʋ\nʌ\nʎ\nʐ\nʑ\nʒ\nʔ\nʰ\nʲ\nʳ\nʷ\nʸ\nʻ\nʼ\nʾ\nʿ\nˈ\nː\nˡ\nˢ\nˣ\nˤ\nα\nβ\nγ\nδ\nε\nζ\nη\nθ\nι\nκ\nλ\nμ\nν\nξ\nο\nπ\nρ\nς\nσ\nτ\nυ\nφ\nχ\nψ\nω\nа\nб\nв\nг\nд\nе\nж\nз\nи\nк\nл\nм\nн\nо\nп\nр\nс\nт\nу\nф\nх\nц\nч\nш\nщ\nъ\nы\nь\nэ\nю\nя\nђ\nє\nі\nј\nљ\nњ\nћ\nӏ\nա\nբ\nգ\nդ\nե\nթ\nի\nլ\nկ\nհ\nմ\nյ\nն\nո\nպ\nս\nվ\nտ\nր\nւ\nք\n־\nא\nב\nג\nד\nה\nו\nז\nח\nט\nי\nך\nכ\nל\nם\nמ\nן\nנ\nס\nע\nף\nפ\nץ\nצ\nק\nר\nש\nת\n،\nء\nا\nب\nة\nت\nث\nج\nح\nخ\nد\nذ\nر\nز\nس\nش\nص\nض\nط\nظ\nع\nغ\nـ\nف\nق\nك\nل\nم\nن\nه\nو\nى\nي\nٹ\nپ\nچ\nک\nگ\nں\nھ\nہ\nی\nے\nअ\nआ\nउ\nए\nक\nख\nग\nच\nज\nट\nड\nण\nत\nथ\nद\nध\nन\nप\nब\nभ\nम\nय\nर\nल\nव\nश\nष\nस\nह\nा\nि\nी\nो\n।\n॥\nং\nঅ\nআ\nই\nউ\nএ\nও\nক\nখ\nগ\nচ\nছ\nজ\nট\nড\nণ\nত\nথ\nদ\nধ\nন\nপ\nব\nভ\nম\nয\nর\nল\nশ\nষ\nস\nহ\nা\nি\nী\nে\nக\nச\nட\nத\nந\nன\nப\nம\nய\nர\nல\nள\nவ\nா\nி\nு\nே\nை\nನ\nರ\nಾ\nක\nය\nර\nල\nව\nා\nก\nง\nต\nท\nน\nพ\nม\nย\nร\nล\nว\nส\nอ\nา\nเ\n་\n།\nག\nང\nད\nན\nཔ\nབ\nམ\nའ\nར\nལ\nས\nမ\nა\nბ\nგ\nდ\nე\nვ\nთ\nი\nკ\nლ\nმ\nნ\nო\nრ\nს\nტ\nუ\nᄀ\nᄂ\nᄃ\nᄅ\nᄆ\nᄇ\nᄉ\nᄊ\nᄋ\nᄌ\nᄎ\nᄏ\nᄐ\nᄑ\nᄒ\nᅡ\nᅢ\nᅥ\nᅦ\nᅧ\nᅩ\nᅪ\nᅭ\nᅮ\nᅯ\nᅲ\nᅳ\nᅴ\nᅵ\nᆨ\nᆫ\nᆯ\nᆷ\nᆸ\nᆼ\nᴬ\nᴮ\nᴰ\nᴵ\nᴺ\nᵀ\nᵃ\nᵇ\nᵈ\nᵉ\nᵍ\nᵏ\nᵐ\nᵒ\nᵖ\nᵗ\nᵘ\nᵢ\nᵣ\nᵤ\nᵥ\nᶜ\nᶠ\n‐\n‑\n‒\n–\n—\n―\n‖\n‘\n’\n‚\n“\n”\n„\n†\n‡\n•\n…\n‰\n′\n″\n›\n‿\n⁄\n⁰\nⁱ\n⁴\n⁵\n⁶\n⁷\n⁸\n⁹\n⁺\n⁻\nⁿ\n₀\n₁\n₂\n₃\n₄\n₅\n₆\n₇\n₈\n₉\n₊\n₍\n₎\nₐ\nₑ\nₒ\nₓ\nₕ\nₖ\nₗ\nₘ\nₙ\nₚ\nₛ\nₜ\n₤\n₩\n€\n₱\n₹\nℓ\n№\nℝ\n™\n⅓\n⅔\n←\n↑\n→\n↓\n↔\n↦\n⇄\n⇌\n⇒\n∂\n∅\n∆\n∇\n∈\n−\n∗\n∘\n√\n∞\n∧\n∨\n∩\n∪\n≈\n≡\n≤\n≥\n⊂\n⊆\n⊕\n⊗\n⋅\n─\n│\n■\n▪\n●\n★\n☆\n☉\n♠\n♣\n♥\n♦\n♭\n♯\n⟨\n⟩\nⱼ\n⺩\n⺼\n⽥\n、\n。\n〈\n〉\n《\n》\n「\n」\n『\n』\n〜\nあ\nい\nう\nえ\nお\nか\nき\nく\nけ\nこ\nさ\nし\nす\nせ\nそ\nた\nち\nっ\nつ\nて\nと\nな\nに\nぬ\nね\nの\nは\nひ\nふ\nへ\nほ\nま\nみ\nむ\nめ\nも\nや\nゆ\nよ\nら\nり\nる\nれ\nろ\nを\nん\nァ\nア\nィ\nイ\nウ\nェ\nエ\nオ\nカ\nキ\nク\nケ\nコ\nサ\nシ\nス\nセ\nタ\nチ\nッ\nツ\nテ\nト\nナ\nニ\nノ\nハ\nヒ\nフ\nヘ\nホ\nマ\nミ\nム\nメ\nモ\nャ\nュ\nョ\nラ\nリ\nル\nレ\nロ\nワ\nン\n・\nー\n一\n三\n上\n下\n不\n世\n中\n主\n久\n之\n也\n事\n二\n五\n井\n京\n人\n亻\n仁\n介\n代\n仮\n伊\n会\n佐\n侍\n保\n信\n健\n元\n光\n八\n公\n内\n出\n分\n前\n劉\n力\n加\n勝\n北\n区\n十\n千\n南\n博\n原\n口\n古\n史\n司\n合\n吉\n同\n名\n和\n囗\n四\n国\n國\n土\n地\n坂\n城\n堂\n場\n士\n夏\n外\n大\n天\n太\n夫\n奈\n女\n子\n学\n宀\n宇\n安\n宗\n定\n宣\n宮\n家\n宿\n寺\n將\n小\n尚\n山\n岡\n島\n崎\n川\n州\n巿\n帝\n平\n年\n幸\n广\n弘\n張\n彳\n後\n御\n德\n心\n忄\n志\n忠\n愛\n成\n我\n戦\n戸\n手\n扌\n政\n文\n新\n方\n日\n明\n星\n春\n昭\n智\n曲\n書\n月\n有\n朝\n木\n本\n李\n村\n東\n松\n林\n森\n楊\n樹\n橋\n歌\n止\n正\n武\n比\n氏\n民\n水\n氵\n氷\n永\n江\n沢\n河\n治\n法\n海\n清\n漢\n瀬\n火\n版\n犬\n王\n生\n田\n男\n疒\n発\n白\n的\n皇\n目\n相\n省\n真\n石\n示\n社\n神\n福\n禾\n秀\n秋\n空\n立\n章\n竹\n糹\n美\n義\n耳\n良\n艹\n花\n英\n華\n葉\n藤\n行\n街\n西\n見\n訁\n語\n谷\n貝\n貴\n車\n軍\n辶\n道\n郎\n郡\n部\n都\n里\n野\n金\n鈴\n镇\n長\n門\n間\n阝\n阿\n陳\n陽\n雄\n青\n面\n風\n食\n香\n馬\n高\n龍\n龸\nﬁ\nﬂ\n！\n（\n）\n，\n－\n．\n／\n：\n？\n～\nthe\nof\nand\nin\nto\nwas\nhe\nis\nas\nfor\non\nwith\nthat\nit\nhis\nby\nat\nfrom\nher\n##s\nshe\nyou\nhad\nan\nwere\nbut\nbe\nthis\nare\nnot\nmy\nthey\none\nwhich\nor\nhave\nhim\nme\nfirst\nall\nalso\ntheir\nhas\nup\nwho\nout\nbeen\nwhen\nafter\nthere\ninto\nnew\ntwo\nits\n##a\ntime\nwould\nno\nwhat\nabout\nsaid\nwe\nover\nthen\nother\nso\nmore\n##e\ncan\nif\nlike\nback\nthem\nonly\nsome\ncould\n##i\nwhere\njust\n##ing\nduring\nbefore\n##n\ndo\n##o\nmade\nschool\nthrough\nthan\nnow\nyears\nmost\nworld\nmay\nbetween\ndown\nwell\nthree\n##d\nyear\nwhile\nwill\n##ed\n##r\n##y\nlater\n##t\ncity\nunder\naround\ndid\nsuch\nbeing\nused\nstate\npeople\npart\nknow\nagainst\nyour\nmany\nsecond\nuniversity\nboth\nnational\n##er\nthese\ndon\nknown\noff\nway\nuntil\nre\nhow\neven\nget\nhead\n...\ndidn\n##ly\nteam\namerican\nbecause\nde\n##l\nborn\nunited\nfilm\nsince\nstill\nlong\nwork\nsouth\nus\nbecame\nany\nhigh\nagain\nday\nfamily\nsee\nright\nman\neyes\nhouse\nseason\nwar\nstates\nincluding\ntook\nlife\nnorth\nsame\neach\ncalled\nname\nmuch\nplace\nhowever\ngo\nfour\ngroup\nanother\nfound\nwon\narea\nhere\ngoing\n10\naway\nseries\nleft\nhome\nmusic\nbest\nmake\nhand\nnumber\ncompany\nseveral\nnever\nlast\njohn\n000\nvery\nalbum\ntake\nend\ngood\ntoo\nfollowing\nreleased\ngame\nplayed\nlittle\nbegan\ndistrict\n##m\nold\nwant\nthose\nside\nheld\nown\nearly\ncounty\nll\nleague\nuse\nwest\n##u\nface\nthink\n##es\n2010\ngovernment\n##h\nmarch\ncame\nsmall\ngeneral\ntown\njune\n##on\nline\nbased\nsomething\n##k\nseptember\nthought\nlooked\nalong\ninternational\n2011\nair\njuly\nclub\nwent\njanuary\noctober\nour\naugust\napril\nyork\n12\nfew\n2012\n2008\neast\nshow\nmember\ncollege\n2009\nfather\npublic\n##us\ncome\nmen\nfive\nset\nstation\nchurch\n##c\nnext\nformer\nnovember\nroom\nparty\nlocated\ndecember\n2013\nage\ngot\n2007\n##g\nsystem\nlet\nlove\n2006\nthough\nevery\n2014\nlook\nsong\nwater\ncentury\nwithout\nbody\nblack\nnight\nwithin\ngreat\nwomen\nsingle\nve\nbuilding\nlarge\npopulation\nriver\nnamed\nband\nwhite\nstarted\n##an\nonce\n15\n20\nshould\n18\n2015\nservice\ntop\nbuilt\nbritish\nopen\ndeath\nking\nmoved\nlocal\ntimes\nchildren\nfebruary\nbook\nwhy\n11\ndoor\nneed\npresident\norder\nfinal\nroad\nwasn\nalthough\ndue\nmajor\ndied\nvillage\nthird\nknew\n2016\nasked\nturned\nst\nwanted\nsay\n##p\ntogether\nreceived\nmain\nson\nserved\ndifferent\n##en\nbehind\nhimself\nfelt\nmembers\npower\nfootball\nlaw\nvoice\nplay\n##in\nnear\npark\nhistory\n30\nhaving\n2005\n16\n##man\nsaw\nmother\n##al\narmy\npoint\nfront\nhelp\nenglish\nstreet\nart\nlate\nhands\ngames\naward\n##ia\nyoung\n14\nput\npublished\ncountry\ndivision\nacross\ntold\n13\noften\never\nfrench\nlondon\ncenter\nsix\nred\n2017\nled\ndays\ninclude\nlight\n25\nfind\ntell\namong\nspecies\nreally\naccording\ncentral\nhalf\n2004\nform\noriginal\ngave\noffice\nmaking\nenough\nlost\nfull\nopened\nmust\nincluded\nlive\ngiven\ngerman\nplayer\nrun\nbusiness\nwoman\ncommunity\ncup\nmight\nmillion\nland\n2000\ncourt\ndevelopment\n17\nshort\nround\nii\nkm\nseen\nclass\nstory\nalways\nbecome\nsure\nresearch\nalmost\ndirector\ncouncil\nla\n##2\ncareer\nthings\nusing\nisland\n##z\ncouldn\ncar\n##is\n24\nclose\nforce\n##1\nbetter\nfree\nsupport\ncontrol\nfield\nstudents\n2003\neducation\nmarried\n##b\nnothing\nworked\nothers\nrecord\nbig\ninside\nlevel\nanything\ncontinued\ngive\njames\n##3\nmilitary\nestablished\nnon\nreturned\nfeel\ndoes\ntitle\nwritten\nthing\nfeet\nwilliam\nfar\nco\nassociation\nhard\nalready\n2002\n##ra\nchampionship\nhuman\nwestern\n100\n##na\ndepartment\nhall\nrole\nvarious\nproduction\n21\n19\nheart\n2001\nliving\nfire\nversion\n##ers\n##f\ntelevision\nroyal\n##4\nproduced\nworking\nact\ncase\nsociety\nregion\npresent\nradio\nperiod\nlooking\nleast\ntotal\nkeep\nengland\nwife\nprogram\nper\nbrother\nmind\nspecial\n22\n##le\nam\nworks\nsoon\n##6\npolitical\ngeorge\nservices\ntaken\ncreated\n##7\nfurther\nable\nreached\ndavid\nunion\njoined\nupon\ndone\nimportant\nsocial\ninformation\neither\n##ic\n##x\nappeared\nposition\nground\nlead\nrock\ndark\nelection\n23\nboard\nfrance\nhair\ncourse\narms\nsite\npolice\ngirl\ninstead\nreal\nsound\n##v\nwords\nmoment\n##te\nsomeone\n##8\nsummer\nproject\nannounced\nsan\nless\nwrote\npast\nfollowed\n##5\nblue\nfounded\nal\nfinally\nindia\ntaking\nrecords\namerica\n##ne\n1999\ndesign\nconsidered\nnorthern\ngod\nstop\nbattle\ntoward\neuropean\noutside\ndescribed\ntrack\ntoday\nplaying\nlanguage\n28\ncall\n26\nheard\nprofessional\nlow\naustralia\nmiles\ncalifornia\nwin\nyet\ngreen\n##ie\ntrying\nblood\n##ton\nsouthern\nscience\nmaybe\neverything\nmatch\nsquare\n27\nmouth\nvideo\nrace\nrecorded\nleave\nabove\n##9\ndaughter\npoints\nspace\n1998\nmuseum\nchange\nmiddle\ncommon\n##0\nmove\ntv\npost\n##ta\nlake\nseven\ntried\nelected\nclosed\nten\npaul\nminister\n##th\nmonths\nstart\nchief\nreturn\ncanada\nperson\nsea\nrelease\nsimilar\nmodern\nbrought\nrest\nhit\nformed\nmr\n##la\n1997\nfloor\nevent\ndoing\nthomas\n1996\nrobert\ncare\nkilled\ntraining\nstar\nweek\nneeded\nturn\nfinished\nrailway\nrather\nnews\nhealth\nsent\nexample\nran\nterm\nmichael\ncoming\ncurrently\nyes\nforces\ndespite\ngold\nareas\n50\nstage\nfact\n29\ndead\nsays\npopular\n2018\noriginally\ngermany\nprobably\ndeveloped\nresult\npulled\nfriend\nstood\nmoney\nrunning\nmi\nsigned\nword\nsongs\nchild\neventually\nmet\ntour\naverage\nteams\nminutes\nfestival\ncurrent\ndeep\nkind\n1995\ndecided\nusually\neastern\nseemed\n##ness\nepisode\nbed\nadded\ntable\nindian\nprivate\ncharles\nroute\navailable\nidea\nthroughout\ncentre\naddition\nappointed\nstyle\n1994\nbooks\neight\nconstruction\npress\nmean\nwall\nfriends\nremained\nschools\nstudy\n##ch\n##um\ninstitute\noh\nchinese\nsometimes\nevents\npossible\n1992\naustralian\ntype\nbrown\nforward\ntalk\nprocess\nfood\ndebut\nseat\nperformance\ncommittee\nfeatures\ncharacter\narts\nherself\nelse\nlot\nstrong\nrussian\nrange\nhours\npeter\narm\n##da\nmorning\ndr\nsold\n##ry\nquickly\ndirected\n1993\nguitar\nchina\n##w\n31\nlist\n##ma\nperformed\nmedia\nuk\nplayers\nsmile\n##rs\nmyself\n40\nplaced\ncoach\nprovince\ntowards\nwouldn\nleading\nwhole\nboy\nofficial\ndesigned\ngrand\ncensus\n##el\neurope\nattack\njapanese\nhenry\n1991\n##re\n##os\ncross\ngetting\nalone\naction\nlower\nnetwork\nwide\nwashington\njapan\n1990\nhospital\nbelieve\nchanged\nsister\n##ar\nhold\ngone\nsir\nhadn\nship\n##ka\nstudies\nacademy\nshot\nrights\nbelow\nbase\nbad\ninvolved\nkept\nlargest\n##ist\nbank\nfuture\nespecially\nbeginning\nmark\nmovement\nsection\nfemale\nmagazine\nplan\nprofessor\nlord\nlonger\n##ian\nsat\nwalked\nhill\nactually\ncivil\nenergy\nmodel\nfamilies\nsize\nthus\naircraft\ncompleted\nincludes\ndata\ncaptain\n##or\nfight\nvocals\nfeatured\nrichard\nbridge\nfourth\n1989\nofficer\nstone\nhear\n##ism\nmeans\nmedical\ngroups\nmanagement\nself\nlips\ncompetition\nentire\nlived\ntechnology\nleaving\nfederal\ntournament\nbit\npassed\nhot\nindependent\nawards\nkingdom\nmary\nspent\nfine\ndoesn\nreported\n##ling\njack\nfall\nraised\nitself\nstay\ntrue\nstudio\n1988\nsports\nreplaced\nparis\nsystems\nsaint\nleader\ntheatre\nwhose\nmarket\ncapital\nparents\nspanish\ncanadian\nearth\n##ity\ncut\ndegree\nwriting\nbay\nchristian\nawarded\nnatural\nhigher\nbill\n##as\ncoast\nprovided\nprevious\nsenior\nft\nvalley\norganization\nstopped\nonto\ncountries\nparts\nconference\nqueen\nsecurity\ninterest\nsaying\nallowed\nmaster\nearlier\nphone\nmatter\nsmith\nwinning\ntry\nhappened\nmoving\ncampaign\nlos\n##ley\nbreath\nnearly\nmid\n1987\ncertain\ngirls\ndate\nitalian\nafrican\nstanding\nfell\nartist\n##ted\nshows\ndeal\nmine\nindustry\n1986\n##ng\neveryone\nrepublic\nprovide\ncollection\nlibrary\nstudent\n##ville\nprimary\nowned\nolder\nvia\nheavy\n1st\nmakes\n##able\nattention\nanyone\nafrica\n##ri\nstated\nlength\nended\nfingers\ncommand\nstaff\nskin\nforeign\nopening\ngovernor\nokay\nmedal\nkill\nsun\ncover\njob\n1985\nintroduced\nchest\nhell\nfeeling\n##ies\nsuccess\nmeet\nreason\nstandard\nmeeting\nnovel\n1984\ntrade\nsource\nbuildings\n##land\nrose\nguy\ngoal\n##ur\nchapter\nnative\nhusband\npreviously\nunit\nlimited\nentered\nweeks\nproducer\noperations\nmountain\ntakes\ncovered\nforced\nrelated\nroman\ncomplete\nsuccessful\nkey\ntexas\ncold\n##ya\nchannel\n1980\ntraditional\nfilms\ndance\nclear\napproximately\n500\nnine\nvan\nprince\nquestion\nactive\ntracks\nireland\nregional\nsilver\nauthor\npersonal\nsense\noperation\n##ine\neconomic\n1983\nholding\ntwenty\nisbn\nadditional\nspeed\nhour\nedition\nregular\nhistoric\nplaces\nwhom\nshook\nmovie\nkm²\nsecretary\nprior\nreport\nchicago\nread\nfoundation\nview\nengine\nscored\n1982\nunits\nask\nairport\nproperty\nready\nimmediately\nlady\nmonth\nlisted\ncontract\n##de\nmanager\nthemselves\nlines\n##ki\nnavy\nwriter\nmeant\n##ts\nruns\n##ro\npractice\nchampionships\nsinger\nglass\ncommission\nrequired\nforest\nstarting\nculture\ngenerally\ngiving\naccess\nattended\ntest\ncouple\nstand\ncatholic\nmartin\ncaught\nexecutive\n##less\neye\n##ey\nthinking\nchair\nquite\nshoulder\n1979\nhope\ndecision\nplays\ndefeated\nmunicipality\nwhether\nstructure\noffered\nslowly\npain\nice\ndirection\n##ion\npaper\nmission\n1981\nmostly\n200\nnoted\nindividual\nmanaged\nnature\nlives\nplant\n##ha\nhelped\nexcept\nstudied\ncomputer\nfigure\nrelationship\nissue\nsignificant\nloss\ndie\nsmiled\ngun\nago\nhighest\n1972\n##am\nmale\nbring\ngoals\nmexico\nproblem\ndistance\ncommercial\ncompletely\nlocation\nannual\nfamous\ndrive\n1976\nneck\n1978\nsurface\ncaused\nitaly\nunderstand\ngreek\nhighway\nwrong\nhotel\ncomes\nappearance\njoseph\ndouble\nissues\nmusical\ncompanies\ncastle\nincome\nreview\nassembly\nbass\ninitially\nparliament\nartists\nexperience\n1974\nparticular\nwalk\nfoot\nengineering\ntalking\nwindow\ndropped\n##ter\nmiss\nbaby\nboys\nbreak\n1975\nstars\nedge\nremember\npolicy\ncarried\ntrain\nstadium\nbar\nsex\nangeles\nevidence\n##ge\nbecoming\nassistant\nsoviet\n1977\nupper\nstep\nwing\n1970\nyouth\nfinancial\nreach\n##ll\nactor\nnumerous\n##se\n##st\nnodded\narrived\n##ation\nminute\n##nt\nbelieved\nsorry\ncomplex\nbeautiful\nvictory\nassociated\ntemple\n1968\n1973\nchance\nperhaps\nmetal\n##son\n1945\nbishop\n##et\nlee\nlaunched\nparticularly\ntree\nle\nretired\nsubject\nprize\ncontains\nyeah\ntheory\nempire\n##ce\nsuddenly\nwaiting\ntrust\nrecording\n##to\nhappy\nterms\ncamp\nchampion\n1971\nreligious\npass\nzealand\nnames\n2nd\nport\nancient\ntom\ncorner\nrepresented\nwatch\nlegal\nanti\njustice\ncause\nwatched\nbrothers\n45\nmaterial\nchanges\nsimply\nresponse\nlouis\nfast\n##ting\nanswer\n60\nhistorical\n1969\nstories\nstraight\ncreate\nfeature\nincreased\nrate\nadministration\nvirginia\nel\nactivities\ncultural\noverall\nwinner\nprograms\nbasketball\nlegs\nguard\nbeyond\ncast\ndoctor\nmm\nflight\nresults\nremains\ncost\neffect\nwinter\n##ble\nlarger\nislands\nproblems\nchairman\ngrew\ncommander\nisn\n1967\npay\nfailed\nselected\nhurt\nfort\nbox\nregiment\nmajority\njournal\n35\nedward\nplans\n##ke\n##ni\nshown\npretty\nirish\ncharacters\ndirectly\nscene\nlikely\noperated\nallow\nspring\n##j\njunior\nmatches\nlooks\nmike\nhouses\nfellow\n##tion\nbeach\nmarriage\n##ham\n##ive\nrules\noil\n65\nflorida\nexpected\nnearby\ncongress\nsam\npeace\nrecent\niii\nwait\nsubsequently\ncell\n##do\nvariety\nserving\nagreed\nplease\npoor\njoe\npacific\nattempt\nwood\ndemocratic\npiece\nprime\n##ca\nrural\nmile\ntouch\nappears\ntownship\n1964\n1966\nsoldiers\n##men\n##ized\n1965\npennsylvania\ncloser\nfighting\nclaimed\nscore\njones\nphysical\neditor\n##ous\nfilled\ngenus\nspecific\nsitting\nsuper\nmom\n##va\ntherefore\nsupported\nstatus\nfear\ncases\nstore\nmeaning\nwales\nminor\nspain\ntower\nfocus\nvice\nfrank\nfollow\nparish\nseparate\ngolden\nhorse\nfifth\nremaining\nbranch\n32\npresented\nstared\n##id\nuses\nsecret\nforms\n##co\nbaseball\nexactly\n##ck\nchoice\nnote\ndiscovered\ntravel\ncomposed\ntruth\nrussia\nball\ncolor\nkiss\ndad\nwind\ncontinue\nring\nreferred\nnumbers\ndigital\ngreater\n##ns\nmetres\nslightly\ndirect\nincrease\n1960\nresponsible\ncrew\nrule\ntrees\ntroops\n##no\nbroke\ngoes\nindividuals\nhundred\nweight\ncreek\nsleep\nmemory\ndefense\nprovides\nordered\ncode\nvalue\njewish\nwindows\n1944\nsafe\njudge\nwhatever\ncorps\nrealized\ngrowing\npre\n##ga\ncities\nalexander\ngaze\nlies\nspread\nscott\nletter\nshowed\nsituation\nmayor\ntransport\nwatching\nworkers\nextended\n##li\nexpression\nnormal\n##ment\nchart\nmultiple\nborder\n##ba\nhost\n##ner\ndaily\nmrs\nwalls\npiano\n##ko\nheat\ncannot\n##ate\nearned\nproducts\ndrama\nera\nauthority\nseasons\njoin\ngrade\n##io\nsign\ndifficult\nmachine\n1963\nterritory\nmainly\n##wood\nstations\nsquadron\n1962\nstepped\niron\n19th\n##led\nserve\nappear\nsky\nspeak\nbroken\ncharge\nknowledge\nkilometres\nremoved\nships\narticle\ncampus\nsimple\n##ty\npushed\nbritain\n##ve\nleaves\nrecently\ncd\nsoft\nboston\nlatter\neasy\nacquired\npoland\n##sa\nquality\nofficers\npresence\nplanned\nnations\nmass\nbroadcast\njean\nshare\nimage\ninfluence\nwild\noffer\nemperor\nelectric\nreading\nheaded\nability\npromoted\nyellow\nministry\n1942\nthroat\nsmaller\npolitician\n##by\nlatin\nspoke\ncars\nwilliams\nmales\nlack\npop\n80\n##ier\nacting\nseeing\nconsists\n##ti\nestate\n1961\npressure\njohnson\nnewspaper\njr\nchris\nolympics\nonline\nconditions\nbeat\nelements\nwalking\nvote\n##field\nneeds\ncarolina\ntext\nfeaturing\nglobal\nblock\nshirt\nlevels\nfrancisco\npurpose\nfemales\net\ndutch\nduke\nahead\ngas\ntwice\nsafety\nserious\nturning\nhighly\nlieutenant\nfirm\nmaria\namount\nmixed\ndaniel\nproposed\nperfect\nagreement\naffairs\n3rd\nseconds\ncontemporary\npaid\n1943\nprison\nsave\nkitchen\nlabel\nadministrative\nintended\nconstructed\nacademic\nnice\nteacher\nraces\n1956\nformerly\ncorporation\nben\nnation\nissued\nshut\n1958\ndrums\nhousing\nvictoria\nseems\nopera\n1959\ngraduated\nfunction\nvon\nmentioned\npicked\nbuild\nrecognized\nshortly\nprotection\npicture\nnotable\nexchange\nelections\n1980s\nloved\npercent\nracing\nfish\nelizabeth\ngarden\nvolume\nhockey\n1941\nbeside\nsettled\n##ford\n1940\ncompeted\nreplied\ndrew\n1948\nactress\nmarine\nscotland\nsteel\nglanced\nfarm\nsteve\n1957\nrisk\ntonight\npositive\nmagic\nsingles\neffects\ngray\nscreen\ndog\n##ja\nresidents\nbus\nsides\nnone\nsecondary\nliterature\npolish\ndestroyed\nflying\nfounder\nhouseholds\n1939\nlay\nreserve\nusa\ngallery\n##ler\n1946\nindustrial\nyounger\napproach\nappearances\nurban\nones\n1950\nfinish\navenue\npowerful\nfully\ngrowth\npage\nhonor\njersey\nprojects\nadvanced\nrevealed\nbasic\n90\ninfantry\npair\nequipment\nvisit\n33\nevening\nsearch\ngrant\neffort\nsolo\ntreatment\nburied\nrepublican\nprimarily\nbottom\nowner\n1970s\nisrael\ngives\njim\ndream\nbob\nremain\nspot\n70\nnotes\nproduce\nchampions\ncontact\ned\nsoul\naccepted\nways\ndel\n##ally\nlosing\nsplit\nprice\ncapacity\nbasis\ntrial\nquestions\n##ina\n1955\n20th\nguess\nofficially\nmemorial\nnaval\ninitial\n##ization\nwhispered\nmedian\nengineer\n##ful\nsydney\n##go\ncolumbia\nstrength\n300\n1952\ntears\nsenate\n00\ncard\nasian\nagent\n1947\nsoftware\n44\ndraw\nwarm\nsupposed\ncom\npro\n##il\ntransferred\nleaned\n##at\ncandidate\nescape\nmountains\nasia\npotential\nactivity\nentertainment\nseem\ntraffic\njackson\nmurder\n36\nslow\nproduct\norchestra\nhaven\nagency\nbbc\ntaught\nwebsite\ncomedy\nunable\nstorm\nplanning\nalbums\nrugby\nenvironment\nscientific\ngrabbed\nprotect\n##hi\nboat\ntypically\n1954\n1953\ndamage\nprincipal\ndivided\ndedicated\nmount\nohio\n##berg\npick\nfought\ndriver\n##der\nempty\nshoulders\nsort\nthank\nberlin\nprominent\naccount\nfreedom\nnecessary\nefforts\nalex\nheadquarters\nfollows\nalongside\ndes\nsimon\nandrew\nsuggested\noperating\nlearning\nsteps\n1949\nsweet\ntechnical\nbegin\neasily\n34\nteeth\nspeaking\nsettlement\nscale\n##sh\nrenamed\nray\nmax\nenemy\nsemi\njoint\ncompared\n##rd\nscottish\nleadership\nanalysis\noffers\ngeorgia\npieces\ncaptured\nanimal\ndeputy\nguest\norganized\n##lin\ntony\ncombined\nmethod\nchallenge\n1960s\nhuge\nwants\nbattalion\nsons\nrise\ncrime\ntypes\nfacilities\ntelling\npath\n1951\nplatform\nsit\n1990s\n##lo\ntells\nassigned\nrich\npull\n##ot\ncommonly\nalive\n##za\nletters\nconcept\nconducted\nwearing\nhappen\nbought\nbecomes\nholy\ngets\nocean\ndefeat\nlanguages\npurchased\ncoffee\noccurred\ntitled\n##q\ndeclared\napplied\nsciences\nconcert\nsounds\njazz\nbrain\n##me\npainting\nfleet\ntax\nnick\n##ius\nmichigan\ncount\nanimals\nleaders\nepisodes\n##line\ncontent\n##den\nbirth\n##it\nclubs\n64\npalace\ncritical\nrefused\nfair\nleg\nlaughed\nreturning\nsurrounding\nparticipated\nformation\nlifted\npointed\nconnected\nrome\nmedicine\nlaid\ntaylor\nsanta\npowers\nadam\ntall\nshared\nfocused\nknowing\nyards\nentrance\nfalls\n##wa\ncalling\n##ad\nsources\nchosen\nbeneath\nresources\nyard\n##ite\nnominated\nsilence\nzone\ndefined\n##que\ngained\nthirty\n38\nbodies\nmoon\n##ard\nadopted\nchristmas\nwidely\nregister\napart\niran\npremier\nserves\ndu\nunknown\nparties\n##les\ngeneration\n##ff\ncontinues\nquick\nfields\nbrigade\nquiet\nteaching\nclothes\nimpact\nweapons\npartner\nflat\ntheater\nsupreme\n1938\n37\nrelations\n##tor\nplants\nsuffered\n1936\nwilson\nkids\nbegins\n##age\n1918\nseats\narmed\ninternet\nmodels\nworth\nlaws\n400\ncommunities\nclasses\nbackground\nknows\nthanks\nquarter\nreaching\nhumans\ncarry\nkilling\nformat\nkong\nhong\nsetting\n75\narchitecture\ndisease\nrailroad\ninc\npossibly\nwish\narthur\nthoughts\nharry\ndoors\ndensity\n##di\ncrowd\nillinois\nstomach\ntone\nunique\nreports\nanyway\n##ir\nliberal\nder\nvehicle\nthick\ndry\ndrug\nfaced\nlargely\nfacility\ntheme\nholds\ncreation\nstrange\ncolonel\n##mi\nrevolution\nbell\npolitics\nturns\nsilent\nrail\nrelief\nindependence\ncombat\nshape\nwrite\ndetermined\nsales\nlearned\n4th\nfinger\noxford\nproviding\n1937\nheritage\nfiction\nsituated\ndesignated\nallowing\ndistribution\nhosted\n##est\nsight\ninterview\nestimated\nreduced\n##ria\ntoronto\nfootballer\nkeeping\nguys\ndamn\nclaim\nmotion\nsport\nsixth\nstayed\n##ze\nen\nrear\nreceive\nhanded\ntwelve\ndress\naudience\ngranted\nbrazil\n##well\nspirit\n##ated\nnoticed\netc\nolympic\nrepresentative\neric\ntight\ntrouble\nreviews\ndrink\nvampire\nmissing\nroles\nranked\nnewly\nhousehold\nfinals\nwave\ncritics\n##ee\nphase\nmassachusetts\npilot\nunlike\nphiladelphia\nbright\nguns\ncrown\norganizations\nroof\n42\nrespectively\nclearly\ntongue\nmarked\ncircle\nfox\nkorea\nbronze\nbrian\nexpanded\nsexual\nsupply\nyourself\ninspired\nlabour\nfc\n##ah\nreference\nvision\ndraft\nconnection\nbrand\nreasons\n1935\nclassic\ndriving\ntrip\njesus\ncells\nentry\n1920\nneither\ntrail\nclaims\natlantic\norders\nlabor\nnose\nafraid\nidentified\nintelligence\ncalls\ncancer\nattacked\npassing\nstephen\npositions\nimperial\ngrey\njason\n39\nsunday\n48\nswedish\navoid\nextra\nuncle\nmessage\ncovers\nallows\nsurprise\nmaterials\nfame\nhunter\n##ji\n1930\ncitizens\nfigures\ndavis\nenvironmental\nconfirmed\nshit\ntitles\ndi\nperforming\ndifference\nacts\nattacks\n##ov\nexisting\nvotes\nopportunity\nnor\nshop\nentirely\ntrains\nopposite\npakistan\n##pa\ndevelop\nresulted\nrepresentatives\nactions\nreality\npressed\n##ish\nbarely\nwine\nconversation\nfaculty\nnorthwest\nends\ndocumentary\nnuclear\nstock\ngrace\nsets\neat\nalternative\n##ps\nbag\nresulting\ncreating\nsurprised\ncemetery\n1919\ndrop\nfinding\nsarah\ncricket\nstreets\ntradition\nride\n1933\nexhibition\ntarget\near\nexplained\nrain\ncomposer\ninjury\napartment\nmunicipal\neducational\noccupied\nnetherlands\nclean\nbillion\nconstitution\nlearn\n1914\nmaximum\nclassical\nfrancis\nlose\nopposition\njose\nontario\nbear\ncore\nhills\nrolled\nending\ndrawn\npermanent\nfun\n##tes\n##lla\nlewis\nsites\nchamber\nryan\n##way\nscoring\nheight\n1934\n##house\nlyrics\nstaring\n55\nofficials\n1917\nsnow\noldest\n##tic\norange\n##ger\nqualified\ninterior\napparently\nsucceeded\nthousand\ndinner\nlights\nexistence\nfans\nheavily\n41\ngreatest\nconservative\nsend\nbowl\nplus\nenter\ncatch\n##un\neconomy\nduty\n1929\nspeech\nauthorities\nprincess\nperformances\nversions\nshall\ngraduate\npictures\neffective\nremembered\npoetry\ndesk\ncrossed\nstarring\nstarts\npassenger\nsharp\n##ant\nacres\nass\nweather\nfalling\nrank\nfund\nsupporting\ncheck\nadult\npublishing\nheads\ncm\nsoutheast\nlane\n##burg\napplication\nbc\n##ura\nles\ncondition\ntransfer\nprevent\ndisplay\nex\nregions\nearl\nfederation\ncool\nrelatively\nanswered\nbesides\n1928\nobtained\nportion\n##town\nmix\n##ding\nreaction\nliked\ndean\nexpress\npeak\n1932\n##tte\ncounter\nreligion\nchain\nrare\nmiller\nconvention\naid\nlie\nvehicles\nmobile\nperform\nsquad\nwonder\nlying\ncrazy\nsword\n##ping\nattempted\ncenturies\nweren\nphilosophy\ncategory\n##ize\nanna\ninterested\n47\nsweden\nwolf\nfrequently\nabandoned\nkg\nliterary\nalliance\ntask\nentitled\n##ay\nthrew\npromotion\nfactory\ntiny\nsoccer\nvisited\nmatt\nfm\nachieved\n52\ndefence\ninternal\npersian\n43\nmethods\n##ging\narrested\notherwise\ncambridge\nprogramming\nvillages\nelementary\ndistricts\nrooms\ncriminal\nconflict\nworry\ntrained\n1931\nattempts\nwaited\nsignal\nbird\ntruck\nsubsequent\nprogramme\n##ol\nad\n49\ncommunist\ndetails\nfaith\nsector\npatrick\ncarrying\nlaugh\n##ss\ncontrolled\nkorean\nshowing\norigin\nfuel\nevil\n1927\n##ent\nbrief\nidentity\ndarkness\naddress\npool\nmissed\npublication\nweb\nplanet\nian\nanne\nwings\ninvited\n##tt\nbriefly\nstandards\nkissed\n##be\nideas\nclimate\ncausing\nwalter\nworse\nalbert\narticles\nwinners\ndesire\naged\nnortheast\ndangerous\ngate\ndoubt\n1922\nwooden\nmulti\n##ky\npoet\nrising\nfunding\n46\ncommunications\ncommunication\nviolence\ncopies\nprepared\nford\ninvestigation\nskills\n1924\npulling\nelectronic\n##ak\n##ial\n##han\ncontaining\nultimately\noffices\nsinging\nunderstanding\nrestaurant\ntomorrow\nfashion\nchrist\nward\nda\npope\nstands\n5th\nflow\nstudios\naired\ncommissioned\ncontained\nexist\nfresh\namericans\n##per\nwrestling\napproved\nkid\nemployed\nrespect\nsuit\n1925\nangel\nasking\nincreasing\nframe\nangry\nselling\n1950s\nthin\nfinds\n##nd\ntemperature\nstatement\nali\nexplain\ninhabitants\ntowns\nextensive\nnarrow\n51\njane\nflowers\nimages\npromise\nsomewhere\nobject\nfly\nclosely\n##ls\n1912\nbureau\ncape\n1926\nweekly\npresidential\nlegislative\n1921\n##ai\n##au\nlaunch\nfounding\n##ny\n978\n##ring\nartillery\nstrike\nun\ninstitutions\nroll\nwriters\nlanding\nchose\nkevin\nanymore\npp\n##ut\nattorney\nfit\ndan\nbillboard\nreceiving\nagricultural\nbreaking\nsought\ndave\nadmitted\nlands\nmexican\n##bury\ncharlie\nspecifically\nhole\niv\nhoward\ncredit\nmoscow\nroads\naccident\n1923\nproved\nwear\nstruck\nhey\nguards\nstuff\nslid\nexpansion\n1915\ncat\nanthony\n##kin\nmelbourne\nopposed\nsub\nsouthwest\narchitect\nfailure\nplane\n1916\n##ron\nmap\ncamera\ntank\nlisten\nregarding\nwet\nintroduction\nmetropolitan\nlink\nep\nfighter\ninch\ngrown\ngene\nanger\nfixed\nbuy\ndvd\nkhan\ndomestic\nworldwide\nchapel\nmill\nfunctions\nexamples\n##head\ndeveloping\n1910\nturkey\nhits\npocket\nantonio\npapers\ngrow\nunless\ncircuit\n18th\nconcerned\nattached\njournalist\nselection\njourney\nconverted\nprovincial\npainted\nhearing\naren\nbands\nnegative\naside\nwondered\nknight\nlap\nsurvey\nma\n##ow\nnoise\nbilly\n##ium\nshooting\nguide\nbedroom\npriest\nresistance\nmotor\nhomes\nsounded\ngiant\n##mer\n150\nscenes\nequal\ncomic\npatients\nhidden\nsolid\nactual\nbringing\nafternoon\ntouched\nfunds\nwedding\nconsisted\nmarie\ncanal\nsr\nkim\ntreaty\nturkish\nrecognition\nresidence\ncathedral\nbroad\nknees\nincident\nshaped\nfired\nnorwegian\nhandle\ncheek\ncontest\nrepresent\n##pe\nrepresenting\nbeauty\n##sen\nbirds\nadvantage\nemergency\nwrapped\ndrawing\nnotice\npink\nbroadcasting\n##ong\nsomehow\nbachelor\nseventh\ncollected\nregistered\nestablishment\nalan\nassumed\nchemical\npersonnel\nroger\nretirement\njeff\nportuguese\nwore\ntied\ndevice\nthreat\nprogress\nadvance\n##ised\nbanks\nhired\nmanchester\nnfl\nteachers\nstructures\nforever\n##bo\ntennis\nhelping\nsaturday\nsale\napplications\njunction\nhip\nincorporated\nneighborhood\ndressed\nceremony\n##ds\ninfluenced\nhers\nvisual\nstairs\ndecades\ninner\nkansas\nhung\nhoped\ngain\nscheduled\ndowntown\nengaged\naustria\nclock\nnorway\ncertainly\npale\nprotected\n1913\nvictor\nemployees\nplate\nputting\nsurrounded\n##ists\nfinishing\nblues\ntropical\n##ries\nminnesota\nconsider\nphilippines\naccept\n54\nretrieved\n1900\nconcern\nanderson\nproperties\ninstitution\ngordon\nsuccessfully\nvietnam\n##dy\nbacking\noutstanding\nmuslim\ncrossing\nfolk\nproducing\nusual\ndemand\noccurs\nobserved\nlawyer\neducated\n##ana\nkelly\nstring\npleasure\nbudget\nitems\nquietly\ncolorado\nphilip\ntypical\n##worth\nderived\n600\nsurvived\nasks\nmental\n##ide\n56\njake\njews\ndistinguished\nltd\n1911\nsri\nextremely\n53\nathletic\nloud\nthousands\nworried\nshadow\ntransportation\nhorses\nweapon\narena\nimportance\nusers\ntim\nobjects\ncontributed\ndragon\ndouglas\naware\nsenator\njohnny\njordan\nsisters\nengines\nflag\ninvestment\nsamuel\nshock\ncapable\nclark\nrow\nwheel\nrefers\nsession\nfamiliar\nbiggest\nwins\nhate\nmaintained\ndrove\nhamilton\nrequest\nexpressed\ninjured\nunderground\nchurches\nwalker\nwars\ntunnel\npasses\nstupid\nagriculture\nsoftly\ncabinet\nregarded\njoining\nindiana\n##ea\n##ms\npush\ndates\nspend\nbehavior\nwoods\nprotein\ngently\nchase\nmorgan\nmention\nburning\nwake\ncombination\noccur\nmirror\nleads\njimmy\nindeed\nimpossible\nsingapore\npaintings\ncovering\n##nes\nsoldier\nlocations\nattendance\nsell\nhistorian\nwisconsin\ninvasion\nargued\npainter\ndiego\nchanging\negypt\n##don\nexperienced\ninches\n##ku\nmissouri\nvol\ngrounds\nspoken\nswitzerland\n##gan\nreform\nrolling\nha\nforget\nmassive\nresigned\nburned\nallen\ntennessee\nlocked\nvalues\nimproved\n##mo\nwounded\nuniverse\nsick\ndating\nfacing\npack\npurchase\nuser\n##pur\nmoments\n##ul\nmerged\nanniversary\n1908\ncoal\nbrick\nunderstood\ncauses\ndynasty\nqueensland\nestablish\nstores\ncrisis\npromote\nhoping\nviews\ncards\nreferee\nextension\n##si\nraise\narizona\nimprove\ncolonial\nformal\ncharged\n##rt\npalm\nlucky\nhide\nrescue\nfaces\n95\nfeelings\ncandidates\njuan\n##ell\ngoods\n6th\ncourses\nweekend\n59\nluke\ncash\nfallen\n##om\ndelivered\naffected\ninstalled\ncarefully\ntries\nswiss\nhollywood\ncosts\nlincoln\nresponsibility\n##he\nshore\nfile\nproper\nnormally\nmaryland\nassistance\njump\nconstant\noffering\nfriendly\nwaters\npersons\nrealize\ncontain\ntrophy\n800\npartnership\nfactor\n58\nmusicians\ncry\nbound\noregon\nindicated\nhero\nhouston\nmedium\n##ure\nconsisting\nsomewhat\n##ara\n57\ncycle\n##che\nbeer\nmoore\nfrederick\ngotten\neleven\nworst\nweak\napproached\narranged\nchin\nloan\nuniversal\nbond\nfifteen\npattern\ndisappeared\n##ney\ntranslated\n##zed\nlip\narab\ncapture\ninterests\ninsurance\n##chi\nshifted\ncave\nprix\nwarning\nsections\ncourts\ncoat\nplot\nsmell\nfeed\ngolf\nfavorite\nmaintain\nknife\nvs\nvoted\ndegrees\nfinance\nquebec\nopinion\ntranslation\nmanner\nruled\noperate\nproductions\nchoose\nmusician\ndiscovery\nconfused\ntired\nseparated\nstream\ntechniques\ncommitted\nattend\nranking\nkings\nthrow\npassengers\nmeasure\nhorror\nfan\nmining\nsand\ndanger\nsalt\ncalm\ndecade\ndam\nrequire\nrunner\n##ik\nrush\nassociate\ngreece\n##ker\nrivers\nconsecutive\nmatthew\n##ski\nsighed\nsq\ndocuments\nsteam\nedited\nclosing\ntie\naccused\n1905\n##ini\nislamic\ndistributed\ndirectors\norganisation\nbruce\n7th\nbreathing\nmad\nlit\narrival\nconcrete\ntaste\n08\ncomposition\nshaking\nfaster\namateur\nadjacent\nstating\n1906\ntwin\nflew\n##ran\ntokyo\npublications\n##tone\nobviously\nridge\nstorage\n1907\ncarl\npages\nconcluded\ndesert\ndriven\nuniversities\nages\nterminal\nsequence\nborough\n250\nconstituency\ncreative\ncousin\neconomics\ndreams\nmargaret\nnotably\nreduce\nmontreal\nmode\n17th\nears\nsaved\njan\nvocal\n##ica\n1909\nandy\n##jo\nriding\nroughly\nthreatened\n##ise\nmeters\nmeanwhile\nlanded\ncompete\nrepeated\ngrass\nczech\nregularly\ncharges\ntea\nsudden\nappeal\n##ung\nsolution\ndescribes\npierre\nclassification\nglad\nparking\n##ning\nbelt\nphysics\n99\nrachel\nadd\nhungarian\nparticipate\nexpedition\ndamaged\ngift\nchildhood\n85\nfifty\n##red\nmathematics\njumped\nletting\ndefensive\nmph\n##ux\n##gh\ntesting\n##hip\nhundreds\nshoot\nowners\nmatters\nsmoke\nisraeli\nkentucky\ndancing\nmounted\ngrandfather\nemma\ndesigns\nprofit\nargentina\n##gs\ntruly\nli\nlawrence\ncole\nbegun\ndetroit\nwilling\nbranches\nsmiling\ndecide\nmiami\nenjoyed\nrecordings\n##dale\npoverty\nethnic\ngay\n##bi\ngary\narabic\n09\naccompanied\n##one\n##ons\nfishing\ndetermine\nresidential\nacid\n##ary\nalice\nreturns\nstarred\nmail\n##ang\njonathan\nstrategy\n##ue\nnet\nforty\ncook\nbusinesses\nequivalent\ncommonwealth\ndistinct\nill\n##cy\nseriously\n##ors\n##ped\nshift\nharris\nreplace\nrio\nimagine\nformula\nensure\n##ber\nadditionally\nscheme\nconservation\noccasionally\npurposes\nfeels\nfavor\n##and\n##ore\n1930s\ncontrast\nhanging\nhunt\nmovies\n1904\ninstruments\nvictims\ndanish\nchristopher\nbusy\ndemon\nsugar\nearliest\ncolony\nstudying\nbalance\nduties\n##ks\nbelgium\nslipped\ncarter\n05\nvisible\nstages\niraq\nfifa\n##im\ncommune\nforming\nzero\n07\ncontinuing\ntalked\ncounties\nlegend\nbathroom\noption\ntail\nclay\ndaughters\nafterwards\nsevere\njaw\nvisitors\n##ded\ndevices\naviation\nrussell\nkate\n##vi\nentering\nsubjects\n##ino\ntemporary\nswimming\nforth\nsmooth\nghost\naudio\nbush\noperates\nrocks\nmovements\nsigns\neddie\n##tz\nann\nvoices\nhonorary\n06\nmemories\ndallas\npure\nmeasures\nracial\npromised\n66\nharvard\nceo\n16th\nparliamentary\nindicate\nbenefit\nflesh\ndublin\nlouisiana\n1902\n1901\npatient\nsleeping\n1903\nmembership\ncoastal\nmedieval\nwanting\nelement\nscholars\nrice\n62\nlimit\nsurvive\nmakeup\nrating\ndefinitely\ncollaboration\nobvious\n##tan\nboss\nms\nbaron\nbirthday\nlinked\nsoil\ndiocese\n##lan\nncaa\n##mann\noffensive\nshell\nshouldn\nwaist\n##tus\nplain\nross\norgan\nresolution\nmanufacturing\nadding\nrelative\nkennedy\n98\nwhilst\nmoth\nmarketing\ngardens\ncrash\n72\nheading\npartners\ncredited\ncarlos\nmoves\ncable\n##zi\nmarshall\n##out\ndepending\nbottle\nrepresents\nrejected\nresponded\nexisted\n04\njobs\ndenmark\nlock\n##ating\ntreated\ngraham\nroutes\ntalent\ncommissioner\ndrugs\nsecure\ntests\nreign\nrestored\nphotography\n##gi\ncontributions\noklahoma\ndesigner\ndisc\ngrin\nseattle\nrobin\npaused\natlanta\nunusual\n##gate\npraised\nlas\nlaughing\nsatellite\nhungary\nvisiting\n##sky\ninteresting\nfactors\ndeck\npoems\nnorman\n##water\nstuck\nspeaker\nrifle\ndomain\npremiered\n##her\ndc\ncomics\nactors\n01\nreputation\neliminated\n8th\nceiling\nprisoners\nscript\n##nce\nleather\naustin\nmississippi\nrapidly\nadmiral\nparallel\ncharlotte\nguilty\ntools\ngender\ndivisions\nfruit\n##bs\nlaboratory\nnelson\nfantasy\nmarry\nrapid\naunt\ntribe\nrequirements\naspects\nsuicide\namongst\nadams\nbone\nukraine\nabc\nkick\nsees\nedinburgh\nclothing\ncolumn\nrough\ngods\nhunting\nbroadway\ngathered\nconcerns\n##ek\nspending\nty\n12th\nsnapped\nrequires\nsolar\nbones\ncavalry\n##tta\niowa\ndrinking\nwaste\nindex\nfranklin\ncharity\nthompson\nstewart\ntip\nflash\nlandscape\nfriday\nenjoy\nsingh\npoem\nlistening\n##back\neighth\nfred\ndifferences\nadapted\nbomb\nukrainian\nsurgery\ncorporate\nmasters\nanywhere\n##more\nwaves\nodd\nsean\nportugal\norleans\ndick\ndebate\nkent\neating\npuerto\ncleared\n96\nexpect\ncinema\n97\nguitarist\nblocks\nelectrical\nagree\ninvolving\ndepth\ndying\npanel\nstruggle\n##ged\npeninsula\nadults\nnovels\nemerged\nvienna\nmetro\ndebuted\nshoes\ntamil\nsongwriter\nmeets\nprove\nbeating\ninstance\nheaven\nscared\nsending\nmarks\nartistic\npassage\nsuperior\n03\nsignificantly\nshopping\n##tive\nretained\n##izing\nmalaysia\ntechnique\ncheeks\n##ola\nwarren\nmaintenance\ndestroy\nextreme\nallied\n120\nappearing\n##yn\nfill\nadvice\nalabama\nqualifying\npolicies\ncleveland\nhat\nbattery\nsmart\nauthors\n10th\nsoundtrack\nacted\ndated\nlb\nglance\nequipped\ncoalition\nfunny\nouter\nambassador\nroy\npossibility\ncouples\ncampbell\ndna\nloose\nethan\nsupplies\n1898\ngonna\n88\nmonster\n##res\nshake\nagents\nfrequency\nsprings\ndogs\npractices\n61\ngang\nplastic\neasier\nsuggests\ngulf\nblade\nexposed\ncolors\nindustries\nmarkets\npan\nnervous\nelectoral\ncharts\nlegislation\nownership\n##idae\nmac\nappointment\nshield\ncopy\nassault\nsocialist\nabbey\nmonument\nlicense\nthrone\nemployment\njay\n93\nreplacement\ncharter\ncloud\npowered\nsuffering\naccounts\noak\nconnecticut\nstrongly\nwright\ncolour\ncrystal\n13th\ncontext\nwelsh\nnetworks\nvoiced\ngabriel\njerry\n##cing\nforehead\nmp\n##ens\nmanage\nschedule\ntotally\nremix\n##ii\nforests\noccupation\nprint\nnicholas\nbrazilian\nstrategic\nvampires\nengineers\n76\nroots\nseek\ncorrect\ninstrumental\nund\nalfred\nbacked\nhop\n##des\nstanley\nrobinson\ntraveled\nwayne\nwelcome\naustrian\nachieve\n67\nexit\nrates\n1899\nstrip\nwhereas\n##cs\nsing\ndeeply\nadventure\nbobby\nrick\njamie\ncareful\ncomponents\ncap\nuseful\npersonality\nknee\n##shi\npushing\nhosts\n02\nprotest\nca\nottoman\nsymphony\n##sis\n63\nboundary\n1890\nprocesses\nconsidering\nconsiderable\ntons\n##work\n##ft\n##nia\ncooper\ntrading\ndear\nconduct\n91\nillegal\napple\nrevolutionary\nholiday\ndefinition\nharder\n##van\njacob\ncircumstances\ndestruction\n##lle\npopularity\ngrip\nclassified\nliverpool\ndonald\nbaltimore\nflows\nseeking\nhonour\napproval\n92\nmechanical\ntill\nhappening\nstatue\ncritic\nincreasingly\nimmediate\ndescribe\ncommerce\nstare\n##ster\nindonesia\nmeat\nrounds\nboats\nbaker\northodox\ndepression\nformally\nworn\nnaked\nclaire\nmuttered\nsentence\n11th\nemily\ndocument\n77\ncriticism\nwished\nvessel\nspiritual\nbent\nvirgin\nparker\nminimum\nmurray\nlunch\ndanny\nprinted\ncompilation\nkeyboards\nfalse\nblow\nbelonged\n68\nraising\n78\ncutting\n##board\npittsburgh\n##up\n9th\nshadows\n81\nhated\nindigenous\njon\n15th\nbarry\nscholar\nah\n##zer\noliver\n##gy\nstick\nsusan\nmeetings\nattracted\nspell\nromantic\n##ver\nye\n1895\nphoto\ndemanded\ncustomers\n##ac\n1896\nlogan\nrevival\nkeys\nmodified\ncommanded\njeans\n##ious\nupset\nraw\nphil\ndetective\nhiding\nresident\nvincent\n##bly\nexperiences\ndiamond\ndefeating\ncoverage\nlucas\nexternal\nparks\nfranchise\nhelen\nbible\nsuccessor\npercussion\ncelebrated\nil\nlift\nprofile\nclan\nromania\n##ied\nmills\n##su\nnobody\nachievement\nshrugged\nfault\n1897\nrhythm\ninitiative\nbreakfast\ncarbon\n700\n69\nlasted\nviolent\n74\nwound\nken\nkiller\ngradually\nfilmed\n°c\ndollars\nprocessing\n94\nremove\ncriticized\nguests\nsang\nchemistry\n##vin\nlegislature\ndisney\n##bridge\nuniform\nescaped\nintegrated\nproposal\npurple\ndenied\nliquid\nkarl\ninfluential\nmorris\nnights\nstones\nintense\nexperimental\ntwisted\n71\n84\n##ld\npace\nnazi\nmitchell\nny\nblind\nreporter\nnewspapers\n14th\ncenters\nburn\nbasin\nforgotten\nsurviving\nfiled\ncollections\nmonastery\nlosses\nmanual\ncouch\ndescription\nappropriate\nmerely\ntag\nmissions\nsebastian\nrestoration\nreplacing\ntriple\n73\nelder\njulia\nwarriors\nbenjamin\njulian\nconvinced\nstronger\namazing\ndeclined\nversus\nmerchant\nhappens\noutput\nfinland\nbare\nbarbara\nabsence\nignored\ndawn\ninjuries\n##port\nproducers\n##ram\n82\nluis\n##ities\nkw\nadmit\nexpensive\nelectricity\nnba\nexception\nsymbol\n##ving\nladies\nshower\nsheriff\ncharacteristics\n##je\naimed\nbutton\nratio\neffectively\nsummit\nangle\njury\nbears\nfoster\nvessels\npants\nexecuted\nevans\ndozen\nadvertising\nkicked\npatrol\n1889\ncompetitions\nlifetime\nprinciples\nathletics\n##logy\nbirmingham\nsponsored\n89\nrob\nnomination\n1893\nacoustic\n##sm\ncreature\nlongest\n##tra\ncredits\nharbor\ndust\njosh\n##so\nterritories\nmilk\ninfrastructure\ncompletion\nthailand\nindians\nleon\narchbishop\n##sy\nassist\npitch\nblake\narrangement\ngirlfriend\nserbian\noperational\nhence\nsad\nscent\nfur\ndj\nsessions\nhp\nrefer\nrarely\n##ora\nexists\n1892\n##ten\nscientists\ndirty\npenalty\nburst\nportrait\nseed\n79\npole\nlimits\nrival\n1894\nstable\nalpha\ngrave\nconstitutional\nalcohol\narrest\nflower\nmystery\ndevil\narchitectural\nrelationships\ngreatly\nhabitat\n##istic\nlarry\nprogressive\nremote\ncotton\n##ics\n##ok\npreserved\nreaches\n##ming\ncited\n86\nvast\nscholarship\ndecisions\ncbs\njoy\nteach\n1885\neditions\nknocked\neve\nsearching\npartly\nparticipation\ngap\nanimated\nfate\nexcellent\n##ett\nna\n87\nalternate\nsaints\nyoungest\n##ily\nclimbed\n##ita\n##tors\nsuggest\n##ct\ndiscussion\nstaying\nchoir\nlakes\njacket\nrevenue\nnevertheless\npeaked\ninstrument\nwondering\nannually\nmanaging\nneil\n1891\nsigning\nterry\n##ice\napply\nclinical\nbrooklyn\naim\ncatherine\nfuck\nfarmers\nfigured\nninth\npride\nhugh\nevolution\nordinary\ninvolvement\ncomfortable\nshouted\ntech\nencouraged\ntaiwan\nrepresentation\nsharing\n##lia\n##em\npanic\nexact\ncargo\ncompeting\nfat\ncried\n83\n1920s\noccasions\npa\ncabin\nborders\nutah\nmarcus\n##isation\nbadly\nmuscles\n##ance\nvictorian\ntransition\nwarner\nbet\npermission\n##rin\nslave\nterrible\nsimilarly\nshares\nseth\nuefa\npossession\nmedals\nbenefits\ncolleges\nlowered\nperfectly\nmall\ntransit\n##ye\n##kar\npublisher\n##ened\nharrison\ndeaths\nelevation\n##ae\nasleep\nmachines\nsigh\nash\nhardly\nargument\noccasion\nparent\nleo\ndecline\n1888\ncontribution\n##ua\nconcentration\n1000\nopportunities\nhispanic\nguardian\nextent\nemotions\nhips\nmason\nvolumes\nbloody\ncontroversy\ndiameter\nsteady\nmistake\nphoenix\nidentify\nviolin\n##sk\ndeparture\nrichmond\nspin\nfuneral\nenemies\n1864\ngear\nliterally\nconnor\nrandom\nsergeant\ngrab\nconfusion\n1865\ntransmission\ninformed\nop\nleaning\nsacred\nsuspended\nthinks\ngates\nportland\nluck\nagencies\nyours\nhull\nexpert\nmuscle\nlayer\npractical\nsculpture\njerusalem\nlatest\nlloyd\nstatistics\ndeeper\nrecommended\nwarrior\narkansas\nmess\nsupports\ngreg\neagle\n1880\nrecovered\nrated\nconcerts\nrushed\n##ano\nstops\neggs\nfiles\npremiere\nkeith\n##vo\ndelhi\nturner\npit\naffair\nbelief\npaint\n##zing\nmate\n##ach\n##ev\nvictim\n##ology\nwithdrew\nbonus\nstyles\nfled\n##ud\nglasgow\ntechnologies\nfunded\nnbc\nadaptation\n##ata\nportrayed\ncooperation\nsupporters\njudges\nbernard\njustin\nhallway\nralph\n##ick\ngraduating\ncontroversial\ndistant\ncontinental\nspider\nbite\n##ho\nrecognize\nintention\nmixing\n##ese\negyptian\nbow\ntourism\nsuppose\nclaiming\ntiger\ndominated\nparticipants\nvi\n##ru\nnurse\npartially\ntape\n##rum\npsychology\n##rn\nessential\ntouring\nduo\nvoting\ncivilian\nemotional\nchannels\n##king\napparent\nhebrew\n1887\ntommy\ncarrier\nintersection\nbeast\nhudson\n##gar\n##zo\nlab\nnova\nbench\ndiscuss\ncosta\n##ered\ndetailed\nbehalf\ndrivers\nunfortunately\nobtain\n##lis\nrocky\n##dae\nsiege\nfriendship\nhoney\n##rian\n1861\namy\nhang\nposted\ngovernments\ncollins\nrespond\nwildlife\npreferred\noperator\n##po\nlaura\npregnant\nvideos\ndennis\nsuspected\nboots\ninstantly\nweird\nautomatic\nbusinessman\nalleged\nplacing\nthrowing\nph\nmood\n1862\nperry\nvenue\njet\nremainder\n##lli\n##ci\npassion\nbiological\nboyfriend\n1863\ndirt\nbuffalo\nron\nsegment\nfa\nabuse\n##era\ngenre\nthrown\nstroke\ncolored\nstress\nexercise\ndisplayed\n##gen\nstruggled\n##tti\nabroad\ndramatic\nwonderful\nthereafter\nmadrid\ncomponent\nwidespread\n##sed\ntale\ncitizen\ntodd\nmonday\n1886\nvancouver\noverseas\nforcing\ncrying\ndescent\n##ris\ndiscussed\nsubstantial\nranks\nregime\n1870\nprovinces\nswitch\ndrum\nzane\nted\ntribes\nproof\nlp\ncream\nresearchers\nvolunteer\nmanor\nsilk\nmilan\ndonated\nallies\nventure\nprinciple\ndelivery\nenterprise\n##ves\n##ans\nbars\ntraditionally\nwitch\nreminded\ncopper\n##uk\npete\ninter\nlinks\ncolin\ngrinned\nelsewhere\ncompetitive\nfrequent\n##oy\nscream\n##hu\ntension\ntexts\nsubmarine\nfinnish\ndefending\ndefend\npat\ndetail\n1884\naffiliated\nstuart\nthemes\nvilla\nperiods\ntool\nbelgian\nruling\ncrimes\nanswers\nfolded\nlicensed\nresort\ndemolished\nhans\nlucy\n1881\nlion\ntraded\nphotographs\nwrites\ncraig\n##fa\ntrials\ngenerated\nbeth\nnoble\ndebt\npercentage\nyorkshire\nerected\nss\nviewed\ngrades\nconfidence\nceased\nislam\ntelephone\nretail\n##ible\nchile\nm²\nroberts\nsixteen\n##ich\ncommented\nhampshire\ninnocent\ndual\npounds\nchecked\nregulations\nafghanistan\nsung\nrico\nliberty\nassets\nbigger\noptions\nangels\nrelegated\ntribute\nwells\nattending\nleaf\n##yan\nbutler\nromanian\nforum\nmonthly\nlisa\npatterns\ngmina\n##tory\nmadison\nhurricane\nrev\n##ians\nbristol\n##ula\nelite\nvaluable\ndisaster\ndemocracy\nawareness\ngermans\nfreyja\n##ins\nloop\nabsolutely\npaying\npopulations\nmaine\nsole\nprayer\nspencer\nreleases\ndoorway\nbull\n##ani\nlover\nmidnight\nconclusion\n##sson\nthirteen\nlily\nmediterranean\n##lt\nnhl\nproud\nsample\n##hill\ndrummer\nguinea\n##ova\nmurphy\nclimb\n##ston\ninstant\nattributed\nhorn\nain\nrailways\nsteven\n##ao\nautumn\nferry\nopponent\nroot\ntraveling\nsecured\ncorridor\nstretched\ntales\nsheet\ntrinity\ncattle\nhelps\nindicates\nmanhattan\nmurdered\nfitted\n1882\ngentle\ngrandmother\nmines\nshocked\nvegas\nproduces\n##light\ncaribbean\n##ou\nbelong\ncontinuous\ndesperate\ndrunk\nhistorically\ntrio\nwaved\nraf\ndealing\nnathan\nbat\nmurmured\ninterrupted\nresiding\nscientist\npioneer\nharold\naaron\n##net\ndelta\nattempting\nminority\nmini\nbelieves\nchorus\ntend\nlots\neyed\nindoor\nload\nshots\nupdated\njail\n##llo\nconcerning\nconnecting\nwealth\n##ved\nslaves\narrive\nrangers\nsufficient\nrebuilt\n##wick\ncardinal\nflood\nmuhammad\nwhenever\nrelation\nrunners\nmoral\nrepair\nviewers\narriving\nrevenge\npunk\nassisted\nbath\nfairly\nbreathe\nlists\ninnings\nillustrated\nwhisper\nnearest\nvoters\nclinton\nties\nultimate\nscreamed\nbeijing\nlions\nandre\nfictional\ngathering\ncomfort\nradar\nsuitable\ndismissed\nhms\nban\npine\nwrist\natmosphere\nvoivodeship\nbid\ntimber\n##ned\n##nan\ngiants\n##ane\ncameron\nrecovery\nuss\nidentical\ncategories\nswitched\nserbia\nlaughter\nnoah\nensemble\ntherapy\npeoples\ntouching\n##off\nlocally\npearl\nplatforms\neverywhere\nballet\ntables\nlanka\nherbert\noutdoor\ntoured\nderek\n1883\nspaces\ncontested\nswept\n1878\nexclusive\nslight\nconnections\n##dra\nwinds\nprisoner\ncollective\nbangladesh\ntube\npublicly\nwealthy\nthai\n##ys\nisolated\nselect\n##ric\ninsisted\npen\nfortune\nticket\nspotted\nreportedly\nanimation\nenforcement\ntanks\n110\ndecides\nwider\nlowest\nowen\n##time\nnod\nhitting\n##hn\ngregory\nfurthermore\nmagazines\nfighters\nsolutions\n##ery\npointing\nrequested\nperu\nreed\nchancellor\nknights\nmask\nworker\neldest\nflames\nreduction\n1860\nvolunteers\n##tis\nreporting\n##hl\nwire\nadvisory\nendemic\norigins\nsettlers\npursue\nknock\nconsumer\n1876\neu\ncompound\ncreatures\nmansion\nsentenced\nivan\ndeployed\nguitars\nfrowned\ninvolves\nmechanism\nkilometers\nperspective\nshops\nmaps\nterminus\nduncan\nalien\nfist\nbridges\n##pers\nheroes\nfed\nderby\nswallowed\n##ros\npatent\nsara\nillness\ncharacterized\nadventures\nslide\nhawaii\njurisdiction\n##op\norganised\n##side\nadelaide\nwalks\nbiology\nse\n##ties\nrogers\nswing\ntightly\nboundaries\n##rie\nprepare\nimplementation\nstolen\n##sha\ncertified\ncolombia\nedwards\ngarage\n##mm\nrecalled\n##ball\nrage\nharm\nnigeria\nbreast\n##ren\nfurniture\npupils\nsettle\n##lus\ncuba\nballs\nclient\nalaska\n21st\nlinear\nthrust\ncelebration\nlatino\ngenetic\nterror\n##cia\n##ening\nlightning\nfee\nwitness\nlodge\nestablishing\nskull\n##ique\nearning\nhood\n##ei\nrebellion\nwang\nsporting\nwarned\nmissile\ndevoted\nactivist\nporch\nworship\nfourteen\npackage\n1871\ndecorated\n##shire\nhoused\n##ock\nchess\nsailed\ndoctors\noscar\njoan\ntreat\ngarcia\nharbour\njeremy\n##ire\ntraditions\ndominant\njacques\n##gon\n##wan\nrelocated\n1879\namendment\nsized\ncompanion\nsimultaneously\nvolleyball\nspun\nacre\nincreases\nstopping\nloves\nbelongs\naffect\ndrafted\ntossed\nscout\nbattles\n1875\nfilming\nshoved\nmunich\ntenure\nvertical\nromance\npc\n##cher\nargue\n##ical\ncraft\nranging\nwww\nopens\nhonest\ntyler\nyesterday\nvirtual\n##let\nmuslims\nreveal\nsnake\nimmigrants\nradical\nscreaming\nspeakers\nfiring\nsaving\nbelonging\nease\nlighting\nprefecture\nblame\nfarmer\nhungry\ngrows\nrubbed\nbeam\nsur\nsubsidiary\n##cha\narmenian\nsao\ndropping\nconventional\n##fer\nmicrosoft\nreply\nqualify\nspots\n1867\nsweat\nfestivals\n##ken\nimmigration\nphysician\ndiscover\nexposure\nsandy\nexplanation\nisaac\nimplemented\n##fish\nhart\ninitiated\nconnect\nstakes\npresents\nheights\nhouseholder\npleased\ntourist\nregardless\nslip\nclosest\n##ction\nsurely\nsultan\nbrings\nriley\npreparation\naboard\nslammed\nbaptist\nexperiment\nongoing\ninterstate\norganic\nplayoffs\n##ika\n1877\n130\n##tar\nhindu\nerror\ntours\ntier\nplenty\narrangements\ntalks\ntrapped\nexcited\nsank\nho\nathens\n1872\ndenver\nwelfare\nsuburb\nathletes\ntrick\ndiverse\nbelly\nexclusively\nyelled\n1868\n##med\nconversion\n##ette\n1874\ninternationally\ncomputers\nconductor\nabilities\nsensitive\nhello\ndispute\nmeasured\nglobe\nrocket\nprices\namsterdam\nflights\ntigers\ninn\nmunicipalities\nemotion\nreferences\n3d\n##mus\nexplains\nairlines\nmanufactured\npm\narchaeological\n1873\ninterpretation\ndevon\ncomment\n##ites\nsettlements\nkissing\nabsolute\nimprovement\nsuite\nimpressed\nbarcelona\nsullivan\njefferson\ntowers\njesse\njulie\n##tin\n##lu\ngrandson\nhi\ngauge\nregard\nrings\ninterviews\ntrace\nraymond\nthumb\ndepartments\nburns\nserial\nbulgarian\nscores\ndemonstrated\n##ix\n1866\nkyle\nalberta\nunderneath\nromanized\n##ward\nrelieved\nacquisition\nphrase\ncliff\nreveals\nhan\ncuts\nmerger\ncustom\n##dar\nnee\ngilbert\ngraduation\n##nts\nassessment\ncafe\ndifficulty\ndemands\nswung\ndemocrat\njennifer\ncommons\n1940s\ngrove\n##yo\ncompleting\nfocuses\nsum\nsubstitute\nbearing\nstretch\nreception\n##py\nreflected\nessentially\ndestination\npairs\n##ched\nsurvival\nresource\n##bach\npromoting\ndoubles\nmessages\ntear\n##down\n##fully\nparade\nflorence\nharvey\nincumbent\npartial\nframework\n900\npedro\nfrozen\nprocedure\nolivia\ncontrols\n##mic\nshelter\npersonally\ntemperatures\n##od\nbrisbane\ntested\nsits\nmarble\ncomprehensive\noxygen\nleonard\n##kov\ninaugural\niranian\nreferring\nquarters\nattitude\n##ivity\nmainstream\nlined\nmars\ndakota\nnorfolk\nunsuccessful\n##°\nexplosion\nhelicopter\ncongressional\n##sing\ninspector\nbitch\nseal\ndeparted\ndivine\n##ters\ncoaching\nexamination\npunishment\nmanufacturer\nsink\ncolumns\nunincorporated\nsignals\nnevada\nsqueezed\ndylan\ndining\nphotos\nmartial\nmanuel\neighteen\nelevator\nbrushed\nplates\nministers\nivy\ncongregation\n##len\nslept\nspecialized\ntaxes\ncurve\nrestricted\nnegotiations\nlikes\nstatistical\narnold\ninspiration\nexecution\nbold\nintermediate\nsignificance\nmargin\nruler\nwheels\ngothic\nintellectual\ndependent\nlistened\neligible\nbuses\nwidow\nsyria\nearn\ncincinnati\ncollapsed\nrecipient\nsecrets\naccessible\nphilippine\nmaritime\ngoddess\nclerk\nsurrender\nbreaks\nplayoff\ndatabase\n##ified\n##lon\nideal\nbeetle\naspect\nsoap\nregulation\nstrings\nexpand\nanglo\nshorter\ncrosses\nretreat\ntough\ncoins\nwallace\ndirections\npressing\n##oon\nshipping\nlocomotives\ncomparison\ntopics\nnephew\n##mes\ndistinction\nhonors\ntravelled\nsierra\nibn\n##over\nfortress\nsa\nrecognised\ncarved\n1869\nclients\n##dan\nintent\n##mar\ncoaches\ndescribing\nbread\n##ington\nbeaten\nnorthwestern\n##ona\nmerit\nyoutube\ncollapse\nchallenges\nem\nhistorians\nobjective\nsubmitted\nvirus\nattacking\ndrake\nassume\n##ere\ndiseases\nmarc\nstem\nleeds\n##cus\n##ab\nfarming\nglasses\n##lock\nvisits\nnowhere\nfellowship\nrelevant\ncarries\nrestaurants\nexperiments\n101\nconstantly\nbases\ntargets\nshah\ntenth\nopponents\nverse\nterritorial\n##ira\nwritings\ncorruption\n##hs\ninstruction\ninherited\nreverse\nemphasis\n##vic\nemployee\narch\nkeeps\nrabbi\nwatson\npayment\nuh\n##ala\nnancy\n##tre\nvenice\nfastest\nsexy\nbanned\nadrian\nproperly\nruth\ntouchdown\ndollar\nboards\nmetre\ncircles\nedges\nfavour\ncomments\nok\ntravels\nliberation\nscattered\nfirmly\n##ular\nholland\npermitted\ndiesel\nkenya\nden\noriginated\n##ral\ndemons\nresumed\ndragged\nrider\n##rus\nservant\nblinked\nextend\ntorn\n##ias\n##sey\ninput\nmeal\neverybody\ncylinder\nkinds\ncamps\n##fe\nbullet\nlogic\n##wn\ncroatian\nevolved\nhealthy\nfool\nchocolate\nwise\npreserve\npradesh\n##ess\nrespective\n1850\n##ew\nchicken\nartificial\ngross\ncorresponding\nconvicted\ncage\ncaroline\ndialogue\n##dor\nnarrative\nstranger\nmario\nbr\nchristianity\nfailing\ntrent\ncommanding\nbuddhist\n1848\nmaurice\nfocusing\nyale\nbike\naltitude\n##ering\nmouse\nrevised\n##sley\nveteran\n##ig\npulls\ntheology\ncrashed\ncampaigns\nlegion\n##ability\ndrag\nexcellence\ncustomer\ncancelled\nintensity\nexcuse\n##lar\nliga\nparticipating\ncontributing\nprinting\n##burn\nvariable\n##rk\ncurious\nbin\nlegacy\nrenaissance\n##my\nsymptoms\nbinding\nvocalist\ndancer\n##nie\ngrammar\ngospel\ndemocrats\nya\nenters\nsc\ndiplomatic\nhitler\n##ser\nclouds\nmathematical\nquit\ndefended\noriented\n##heim\nfundamental\nhardware\nimpressive\nequally\nconvince\nconfederate\nguilt\nchuck\nsliding\n##ware\nmagnetic\nnarrowed\npetersburg\nbulgaria\notto\nphd\nskill\n##ama\nreader\nhopes\npitcher\nreservoir\nhearts\nautomatically\nexpecting\nmysterious\nbennett\nextensively\nimagined\nseeds\nmonitor\nfix\n##ative\njournalism\nstruggling\nsignature\nranch\nencounter\nphotographer\nobservation\nprotests\n##pin\ninfluences\n##hr\ncalendar\n##all\ncruz\ncroatia\nlocomotive\nhughes\nnaturally\nshakespeare\nbasement\nhook\nuncredited\nfaded\ntheories\napproaches\ndare\nphillips\nfilling\nfury\nobama\n##ain\nefficient\narc\ndeliver\nmin\nraid\nbreeding\ninducted\nleagues\nefficiency\naxis\nmontana\neagles\n##ked\nsupplied\ninstructions\nkaren\npicking\nindicating\ntrap\nanchor\npractically\nchristians\ntomb\nvary\noccasional\nelectronics\nlords\nreaders\nnewcastle\nfaint\ninnovation\ncollect\nsituations\nengagement\n160\nclaude\nmixture\n##feld\npeer\ntissue\nlogo\nlean\n##ration\n°f\nfloors\n##ven\narchitects\nreducing\n##our\n##ments\nrope\n1859\nottawa\n##har\nsamples\nbanking\ndeclaration\nproteins\nresignation\nfrancois\nsaudi\nadvocate\nexhibited\narmor\ntwins\ndivorce\n##ras\nabraham\nreviewed\njo\ntemporarily\nmatrix\nphysically\npulse\ncurled\n##ena\ndifficulties\nbengal\nusage\n##ban\nannie\nriders\ncertificate\n##pi\nholes\nwarsaw\ndistinctive\njessica\n##mon\nmutual\n1857\ncustoms\ncircular\neugene\nremoval\nloaded\nmere\nvulnerable\ndepicted\ngenerations\ndame\nheir\nenormous\nlightly\nclimbing\npitched\nlessons\npilots\nnepal\nram\ngoogle\npreparing\nbrad\nlouise\nrenowned\n##₂\nliam\n##ably\nplaza\nshaw\nsophie\nbrilliant\nbills\n##bar\n##nik\nfucking\nmainland\nserver\npleasant\nseized\nveterans\njerked\nfail\nbeta\nbrush\nradiation\nstored\nwarmth\nsoutheastern\nnate\nsin\nraced\nberkeley\njoke\nathlete\ndesignation\ntrunk\n##low\nroland\nqualification\narchives\nheels\nartwork\nreceives\njudicial\nreserves\n##bed\nwoke\ninstallation\nabu\nfloating\nfake\nlesser\nexcitement\ninterface\nconcentrated\naddressed\ncharacteristic\namanda\nsaxophone\nmonk\nauto\n##bus\nreleasing\negg\ndies\ninteraction\ndefender\nce\noutbreak\nglory\nloving\n##bert\nsequel\nconsciousness\nhttp\nawake\nski\nenrolled\n##ress\nhandling\nrookie\nbrow\nsomebody\nbiography\nwarfare\namounts\ncontracts\npresentation\nfabric\ndissolved\nchallenged\nmeter\npsychological\nlt\nelevated\nrally\naccurate\n##tha\nhospitals\nundergraduate\nspecialist\nvenezuela\nexhibit\nshed\nnursing\nprotestant\nfluid\nstructural\nfootage\njared\nconsistent\nprey\n##ska\nsuccession\nreflect\nexile\nlebanon\nwiped\nsuspect\nshanghai\nresting\nintegration\npreservation\nmarvel\nvariant\npirates\nsheep\nrounded\ncapita\nsailing\ncolonies\nmanuscript\ndeemed\nvariations\nclarke\nfunctional\nemerging\nboxing\nrelaxed\ncurse\nazerbaijan\nheavyweight\nnickname\neditorial\nrang\ngrid\ntightened\nearthquake\nflashed\nmiguel\nrushing\n##ches\nimprovements\nboxes\nbrooks\n180\nconsumption\nmolecular\nfelix\nsocieties\nrepeatedly\nvariation\naids\ncivic\ngraphics\nprofessionals\nrealm\nautonomous\nreceiver\ndelayed\nworkshop\nmilitia\nchairs\ntrump\ncanyon\n##point\nharsh\nextending\nlovely\nhappiness\n##jan\nstake\neyebrows\nembassy\nwellington\nhannah\n##ella\nsony\ncorners\nbishops\nswear\ncloth\ncontents\nxi\nnamely\ncommenced\n1854\nstanford\nnashville\ncourage\ngraphic\ncommitment\ngarrison\n##bin\nhamlet\nclearing\nrebels\nattraction\nliteracy\ncooking\nruins\ntemples\njenny\nhumanity\ncelebrate\nhasn\nfreight\nsixty\nrebel\nbastard\n##art\nnewton\n##ada\ndeer\n##ges\n##ching\nsmiles\ndelaware\nsingers\n##ets\napproaching\nassists\nflame\n##ph\nboulevard\nbarrel\nplanted\n##ome\npursuit\n##sia\nconsequences\nposts\nshallow\ninvitation\nrode\ndepot\nernest\nkane\nrod\nconcepts\npreston\ntopic\nchambers\nstriking\nblast\narrives\ndescendants\nmontgomery\nranges\nworlds\n##lay\n##ari\nspan\nchaos\npraise\n##ag\nfewer\n1855\nsanctuary\nmud\nfbi\n##ions\nprogrammes\nmaintaining\nunity\nharper\nbore\nhandsome\nclosure\ntournaments\nthunder\nnebraska\nlinda\nfacade\nputs\nsatisfied\nargentine\ndale\ncork\ndome\npanama\n##yl\n1858\ntasks\nexperts\n##ates\nfeeding\nequation\n##las\n##ida\n##tu\nengage\nbryan\n##ax\num\nquartet\nmelody\ndisbanded\nsheffield\nblocked\ngasped\ndelay\nkisses\nmaggie\nconnects\n##non\nsts\npoured\ncreator\npublishers\n##we\nguided\nellis\nextinct\nhug\ngaining\n##ord\ncomplicated\n##bility\npoll\nclenched\ninvestigate\n##use\nthereby\nquantum\nspine\ncdp\nhumor\nkills\nadministered\nsemifinals\n##du\nencountered\nignore\n##bu\ncommentary\n##maker\nbother\nroosevelt\n140\nplains\nhalfway\nflowing\ncultures\ncrack\nimprisoned\nneighboring\nairline\n##ses\n##view\n##mate\n##ec\ngather\nwolves\nmarathon\ntransformed\n##ill\ncruise\norganisations\ncarol\npunch\nexhibitions\nnumbered\nalarm\nratings\ndaddy\nsilently\n##stein\nqueens\ncolours\nimpression\nguidance\nliu\ntactical\n##rat\nmarshal\ndella\narrow\n##ings\nrested\nfeared\ntender\nowns\nbitter\nadvisor\nescort\n##ides\nspare\nfarms\ngrants\n##ene\ndragons\nencourage\ncolleagues\ncameras\n##und\nsucked\npile\nspirits\nprague\nstatements\nsuspension\nlandmark\nfence\ntorture\nrecreation\nbags\npermanently\nsurvivors\npond\nspy\npredecessor\nbombing\ncoup\n##og\nprotecting\ntransformation\nglow\n##lands\n##book\ndug\npriests\nandrea\nfeat\nbarn\njumping\n##chen\n##ologist\n##con\ncasualties\nstern\nauckland\npipe\nserie\nrevealing\nba\n##bel\ntrevor\nmercy\nspectrum\nyang\nconsist\ngoverning\ncollaborated\npossessed\nepic\ncomprises\nblew\nshane\n##ack\nlopez\nhonored\nmagical\nsacrifice\njudgment\nperceived\nhammer\nmtv\nbaronet\ntune\ndas\nmissionary\nsheets\n350\nneutral\noral\nthreatening\nattractive\nshade\naims\nseminary\n##master\nestates\n1856\nmichel\nwounds\nrefugees\nmanufacturers\n##nic\nmercury\nsyndrome\nporter\n##iya\n##din\nhamburg\nidentification\nupstairs\npurse\nwidened\npause\ncared\nbreathed\naffiliate\nsantiago\nprevented\nceltic\nfisher\n125\nrecruited\nbyzantine\nreconstruction\nfarther\n##mp\ndiet\nsake\nau\nspite\nsensation\n##ert\nblank\nseparation\n105\n##hon\nvladimir\narmies\nanime\n##lie\naccommodate\norbit\ncult\nsofia\narchive\n##ify\n##box\nfounders\nsustained\ndisorder\nhonours\nnortheastern\nmia\ncrops\nviolet\nthreats\nblanket\nfires\ncanton\nfollowers\nsouthwestern\nprototype\nvoyage\nassignment\naltered\nmoderate\nprotocol\npistol\n##eo\nquestioned\nbrass\nlifting\n1852\nmath\nauthored\n##ual\ndoug\ndimensional\ndynamic\n##san\n1851\npronounced\ngrateful\nquest\nuncomfortable\nboom\npresidency\nstevens\nrelating\npoliticians\nchen\nbarrier\nquinn\ndiana\nmosque\ntribal\ncheese\npalmer\nportions\nsometime\nchester\ntreasure\nwu\nbend\ndownload\nmillions\nreforms\nregistration\n##osa\nconsequently\nmonitoring\nate\npreliminary\nbrandon\ninvented\nps\neaten\nexterior\nintervention\nports\ndocumented\nlog\ndisplays\nlecture\nsally\nfavourite\n##itz\nvermont\nlo\ninvisible\nisle\nbreed\n##ator\njournalists\nrelay\nspeaks\nbackward\nexplore\nmidfielder\nactively\nstefan\nprocedures\ncannon\nblond\nkenneth\ncentered\nservants\nchains\nlibraries\nmalcolm\nessex\nhenri\nslavery\n##hal\nfacts\nfairy\ncoached\ncassie\ncats\nwashed\ncop\n##fi\nannouncement\nitem\n2000s\nvinyl\nactivated\nmarco\nfrontier\ngrowled\ncurriculum\n##das\nloyal\naccomplished\nleslie\nritual\nkenny\n##00\nvii\nnapoleon\nhollow\nhybrid\njungle\nstationed\nfriedrich\ncounted\n##ulated\nplatinum\ntheatrical\nseated\ncol\nrubber\nglen\n1840\ndiversity\nhealing\nextends\nid\nprovisions\nadministrator\ncolumbus\n##oe\ntributary\nte\nassured\norg\n##uous\nprestigious\nexamined\nlectures\ngrammy\nronald\nassociations\nbailey\nallan\nessays\nflute\nbelieving\nconsultant\nproceedings\ntravelling\n1853\nkit\nkerala\nyugoslavia\nbuddy\nmethodist\n##ith\nburial\ncentres\nbatman\n##nda\ndiscontinued\nbo\ndock\nstockholm\nlungs\nseverely\n##nk\nciting\nmanga\n##ugh\nsteal\nmumbai\niraqi\nrobot\ncelebrity\nbride\nbroadcasts\nabolished\npot\njoel\noverhead\nfranz\npacked\nreconnaissance\njohann\nacknowledged\nintroduce\nhandled\ndoctorate\ndevelopments\ndrinks\nalley\npalestine\n##nis\n##aki\nproceeded\nrecover\nbradley\ngrain\npatch\nafford\ninfection\nnationalist\nlegendary\n##ath\ninterchange\nvirtually\ngen\ngravity\nexploration\namber\nvital\nwishes\npowell\ndoctrine\nelbow\nscreenplay\n##bird\ncontribute\nindonesian\npet\ncreates\n##com\nenzyme\nkylie\ndiscipline\ndrops\nmanila\nhunger\n##ien\nlayers\nsuffer\nfever\nbits\nmonica\nkeyboard\nmanages\n##hood\nsearched\nappeals\n##bad\ntestament\ngrande\nreid\n##war\nbeliefs\ncongo\n##ification\n##dia\nsi\nrequiring\n##via\ncasey\n1849\nregret\nstreak\nrape\ndepends\nsyrian\nsprint\npound\ntourists\nupcoming\npub\n##xi\ntense\n##els\npracticed\necho\nnationwide\nguild\nmotorcycle\nliz\n##zar\nchiefs\ndesired\nelena\nbye\nprecious\nabsorbed\nrelatives\nbooth\npianist\n##mal\ncitizenship\nexhausted\nwilhelm\n##ceae\n##hed\nnoting\nquarterback\nurge\nhectares\n##gue\nace\nholly\n##tal\nblonde\ndavies\nparked\nsustainable\nstepping\ntwentieth\nairfield\ngalaxy\nnest\nchip\n##nell\ntan\nshaft\npaulo\nrequirement\n##zy\nparadise\ntobacco\ntrans\nrenewed\nvietnamese\n##cker\n##ju\nsuggesting\ncatching\nholmes\nenjoying\nmd\ntrips\ncolt\nholder\nbutterfly\nnerve\nreformed\ncherry\nbowling\ntrailer\ncarriage\ngoodbye\nappreciate\ntoy\njoshua\ninteractive\nenabled\ninvolve\n##kan\ncollar\ndetermination\nbunch\nfacebook\nrecall\nshorts\nsuperintendent\nepiscopal\nfrustration\ngiovanni\nnineteenth\nlaser\nprivately\narray\ncirculation\n##ovic\narmstrong\ndeals\npainful\npermit\ndiscrimination\n##wi\naires\nretiring\ncottage\nni\n##sta\nhorizon\nellen\njamaica\nripped\nfernando\nchapters\nplaystation\npatron\nlecturer\nnavigation\nbehaviour\ngenes\ngeorgian\nexport\nsolomon\nrivals\nswift\nseventeen\nrodriguez\nprinceton\nindependently\nsox\n1847\narguing\nentity\ncasting\nhank\ncriteria\noakland\ngeographic\nmilwaukee\nreflection\nexpanding\nconquest\ndubbed\n##tv\nhalt\nbrave\nbrunswick\ndoi\narched\ncurtis\ndivorced\npredominantly\nsomerset\nstreams\nugly\nzoo\nhorrible\ncurved\nbuenos\nfierce\ndictionary\nvector\ntheological\nunions\nhandful\nstability\nchan\npunjab\nsegments\n##lly\naltar\nignoring\ngesture\nmonsters\npastor\n##stone\nthighs\nunexpected\noperators\nabruptly\ncoin\ncompiled\nassociates\nimproving\nmigration\npin\n##ose\ncompact\ncollegiate\nreserved\n##urs\nquarterfinals\nroster\nrestore\nassembled\nhurry\noval\n##cies\n1846\nflags\nmartha\n##del\nvictories\nsharply\n##rated\nargues\ndeadly\nneo\ndrawings\nsymbols\nperformer\n##iel\ngriffin\nrestrictions\nediting\nandrews\njava\njournals\narabia\ncompositions\ndee\npierce\nremoving\nhindi\ncasino\nrunway\ncivilians\nminds\nnasa\nhotels\n##zation\nrefuge\nrent\nretain\npotentially\nconferences\nsuburban\nconducting\n##tto\n##tions\n##tle\ndescended\nmassacre\n##cal\nammunition\nterrain\nfork\nsouls\ncounts\nchelsea\ndurham\ndrives\ncab\n##bank\nperth\nrealizing\npalestinian\nfinn\nsimpson\n##dal\nbetty\n##ule\nmoreover\nparticles\ncardinals\ntent\nevaluation\nextraordinary\n##oid\ninscription\n##works\nwednesday\nchloe\nmaintains\npanels\nashley\ntrucks\n##nation\ncluster\nsunlight\nstrikes\nzhang\n##wing\ndialect\ncanon\n##ap\ntucked\n##ws\ncollecting\n##mas\n##can\n##sville\nmaker\nquoted\nevan\nfranco\naria\nbuying\ncleaning\neva\ncloset\nprovision\napollo\nclinic\nrat\n##ez\nnecessarily\nac\n##gle\n##ising\nvenues\nflipped\ncent\nspreading\ntrustees\nchecking\nauthorized\n##sco\ndisappointed\n##ado\nnotion\nduration\ntrumpet\nhesitated\ntopped\nbrussels\nrolls\ntheoretical\nhint\ndefine\naggressive\nrepeat\nwash\npeaceful\noptical\nwidth\nallegedly\nmcdonald\nstrict\ncopyright\n##illa\ninvestors\nmar\njam\nwitnesses\nsounding\nmiranda\nmichelle\nprivacy\nhugo\nharmony\n##pp\nvalid\nlynn\nglared\nnina\n102\nheadquartered\ndiving\nboarding\ngibson\n##ncy\nalbanian\nmarsh\nroutine\ndealt\nenhanced\ner\nintelligent\nsubstance\ntargeted\nenlisted\ndiscovers\nspinning\nobservations\npissed\nsmoking\nrebecca\ncapitol\nvisa\nvaried\ncostume\nseemingly\nindies\ncompensation\nsurgeon\nthursday\narsenal\nwestminster\nsuburbs\nrid\nanglican\n##ridge\nknots\nfoods\nalumni\nlighter\nfraser\nwhoever\nportal\nscandal\n##ray\ngavin\nadvised\ninstructor\nflooding\nterrorist\n##ale\nteenage\ninterim\nsenses\nduck\nteen\nthesis\nabby\neager\novercome\n##ile\nnewport\nglenn\nrises\nshame\n##cc\nprompted\npriority\nforgot\nbomber\nnicolas\nprotective\n360\ncartoon\nkatherine\nbreeze\nlonely\ntrusted\nhenderson\nrichardson\nrelax\nbanner\ncandy\npalms\nremarkable\n##rio\nlegends\ncricketer\nessay\nordained\nedmund\nrifles\ntrigger\n##uri\n##away\nsail\nalert\n1830\naudiences\npenn\nsussex\nsiblings\npursued\nindianapolis\nresist\nrosa\nconsequence\nsucceed\navoided\n1845\n##ulation\ninland\n##tie\n##nna\ncounsel\nprofession\nchronicle\nhurried\n##una\neyebrow\neventual\nbleeding\ninnovative\ncure\n##dom\ncommittees\naccounting\ncon\nscope\nhardy\nheather\ntenor\ngut\nherald\ncodes\ntore\nscales\nwagon\n##oo\nluxury\ntin\nprefer\nfountain\ntriangle\nbonds\ndarling\nconvoy\ndried\ntraced\nbeings\ntroy\naccidentally\nslam\nfindings\nsmelled\njoey\nlawyers\noutcome\nsteep\nbosnia\nconfiguration\nshifting\ntoll\nbrook\nperformers\nlobby\nphilosophical\nconstruct\nshrine\naggregate\nboot\ncox\nphenomenon\nsavage\ninsane\nsolely\nreynolds\nlifestyle\n##ima\nnationally\nholdings\nconsideration\nenable\nedgar\nmo\nmama\n##tein\nfights\nrelegation\nchances\natomic\nhub\nconjunction\nawkward\nreactions\ncurrency\nfinale\nkumar\nunderwent\nsteering\nelaborate\ngifts\ncomprising\nmelissa\nveins\nreasonable\nsunshine\nchi\nsolve\ntrails\ninhabited\nelimination\nethics\nhuh\nana\nmolly\nconsent\napartments\nlayout\nmarines\n##ces\nhunters\nbulk\n##oma\nhometown\n##wall\n##mont\ncracked\nreads\nneighbouring\nwithdrawn\nadmission\nwingspan\ndamned\nanthology\nlancashire\nbrands\nbatting\nforgive\ncuban\nawful\n##lyn\n104\ndimensions\nimagination\n##ade\ndante\n##ship\ntracking\ndesperately\ngoalkeeper\n##yne\ngroaned\nworkshops\nconfident\nburton\ngerald\nmilton\ncircus\nuncertain\nslope\ncopenhagen\nsophia\nfog\nphilosopher\nportraits\naccent\ncycling\nvarying\ngripped\nlarvae\ngarrett\nspecified\nscotia\nmature\nluther\nkurt\nrap\n##kes\naerial\n750\nferdinand\nheated\nes\ntransported\n##shan\nsafely\nnonetheless\n##orn\n##gal\nmotors\ndemanding\n##sburg\nstartled\n##brook\nally\ngenerate\ncaps\nghana\nstained\ndemo\nmentions\nbeds\nap\nafterward\ndiary\n##bling\nutility\n##iro\nrichards\n1837\nconspiracy\nconscious\nshining\nfootsteps\nobserver\ncyprus\nurged\nloyalty\ndeveloper\nprobability\nolive\nupgraded\ngym\nmiracle\ninsects\ngraves\n1844\nourselves\nhydrogen\namazon\nkatie\ntickets\npoets\n##pm\nplanes\n##pan\nprevention\nwitnessed\ndense\njin\nrandy\ntang\nwarehouse\nmonroe\nbang\narchived\nelderly\ninvestigations\nalec\ngranite\nmineral\nconflicts\ncontrolling\naboriginal\ncarlo\n##zu\nmechanics\nstan\nstark\nrhode\nskirt\nest\n##berry\nbombs\nrespected\n##horn\nimposed\nlimestone\ndeny\nnominee\nmemphis\ngrabbing\ndisabled\n##als\namusement\naa\nfrankfurt\ncorn\nreferendum\nvaries\nslowed\ndisk\nfirms\nunconscious\nincredible\nclue\nsue\n##zhou\ntwist\n##cio\njoins\nidaho\nchad\ndevelopers\ncomputing\ndestroyer\n103\nmortal\ntucker\nkingston\nchoices\nyu\ncarson\n1800\nos\nwhitney\ngeneva\npretend\ndimension\nstaged\nplateau\nmaya\n##une\nfreestyle\n##bc\nrovers\nhiv\n##ids\ntristan\nclassroom\nprospect\n##hus\nhonestly\ndiploma\nlied\nthermal\nauxiliary\nfeast\nunlikely\niata\n##tel\nmorocco\npounding\ntreasury\nlithuania\nconsiderably\n1841\ndish\n1812\ngeological\nmatching\nstumbled\ndestroying\nmarched\nbrien\nadvances\ncake\nnicole\nbelle\nsettling\nmeasuring\ndirecting\n##mie\ntuesday\nbassist\ncapabilities\nstunned\nfraud\ntorpedo\n##list\n##phone\nanton\nwisdom\nsurveillance\nruined\n##ulate\nlawsuit\nhealthcare\ntheorem\nhalls\ntrend\naka\nhorizontal\ndozens\nacquire\nlasting\nswim\nhawk\ngorgeous\nfees\nvicinity\ndecrease\nadoption\ntactics\n##ography\npakistani\n##ole\ndraws\n##hall\nwillie\nburke\nheath\nalgorithm\nintegral\npowder\nelliott\nbrigadier\njackie\ntate\nvarieties\ndarker\n##cho\nlately\ncigarette\nspecimens\nadds\n##ree\n##ensis\n##inger\nexploded\nfinalist\ncia\nmurders\nwilderness\narguments\nnicknamed\nacceptance\nonwards\nmanufacture\nrobertson\njets\ntampa\nenterprises\nblog\nloudly\ncomposers\nnominations\n1838\nai\nmalta\ninquiry\nautomobile\nhosting\nviii\nrays\ntilted\ngrief\nmuseums\nstrategies\nfurious\neuro\nequality\ncohen\npoison\nsurrey\nwireless\ngoverned\nridiculous\nmoses\n##esh\n##room\nvanished\n##ito\nbarnes\nattract\nmorrison\nistanbul\n##iness\nabsent\nrotation\npetition\njanet\n##logical\nsatisfaction\ncustody\ndeliberately\nobservatory\ncomedian\nsurfaces\npinyin\nnovelist\nstrictly\ncanterbury\noslo\nmonks\nembrace\nibm\njealous\nphotograph\ncontinent\ndorothy\nmarina\ndoc\nexcess\nholden\nallegations\nexplaining\nstack\navoiding\nlance\nstoryline\nmajesty\npoorly\nspike\ndos\nbradford\nraven\ntravis\nclassics\nproven\nvoltage\npillow\nfists\nbutt\n1842\ninterpreted\n##car\n1839\ngage\ntelegraph\nlens\npromising\nexpelled\ncasual\ncollector\nzones\n##min\nsilly\nnintendo\n##kh\n##bra\ndownstairs\nchef\nsuspicious\nafl\nflies\nvacant\nuganda\npregnancy\ncondemned\nlutheran\nestimates\ncheap\ndecree\nsaxon\nproximity\nstripped\nidiot\ndeposits\ncontrary\npresenter\nmagnus\nglacier\nim\noffense\nedwin\n##ori\nupright\n##long\nbolt\n##ois\ntoss\ngeographical\n##izes\nenvironments\ndelicate\nmarking\nabstract\nxavier\nnails\nwindsor\nplantation\noccurring\nequity\nsaskatchewan\nfears\ndrifted\nsequences\nvegetation\nrevolt\n##stic\n1843\nsooner\nfusion\nopposing\nnato\nskating\n1836\nsecretly\nruin\nlease\n##oc\nedit\n##nne\nflora\nanxiety\nruby\n##ological\n##mia\ntel\nbout\ntaxi\nemmy\nfrost\nrainbow\ncompounds\nfoundations\nrainfall\nassassination\nnightmare\ndominican\n##win\nachievements\ndeserve\norlando\nintact\narmenia\n##nte\ncalgary\nvalentine\n106\nmarion\nproclaimed\ntheodore\nbells\ncourtyard\nthigh\ngonzalez\nconsole\ntroop\nminimal\nmonte\neveryday\n##ence\n##if\nsupporter\nterrorism\nbuck\nopenly\npresbyterian\nactivists\ncarpet\n##iers\nrubbing\nuprising\n##yi\ncute\nconceived\nlegally\n##cht\nmillennium\ncello\nvelocity\nji\nrescued\ncardiff\n1835\nrex\nconcentrate\nsenators\nbeard\nrendered\nglowing\nbattalions\nscouts\ncompetitors\nsculptor\ncatalogue\narctic\nion\nraja\nbicycle\nwow\nglancing\nlawn\n##woman\ngentleman\nlighthouse\npublish\npredicted\ncalculated\n##val\nvariants\n##gne\nstrain\n##ui\nwinston\ndeceased\n##nus\ntouchdowns\nbrady\ncaleb\nsinking\nechoed\ncrush\nhon\nblessed\nprotagonist\nhayes\nendangered\nmagnitude\neditors\n##tine\nestimate\nresponsibilities\n##mel\nbackup\nlaying\nconsumed\nsealed\nzurich\nlovers\nfrustrated\n##eau\nahmed\nkicking\nmit\ntreasurer\n1832\nbiblical\nrefuse\nterrified\npump\nagrees\ngenuine\nimprisonment\nrefuses\nplymouth\n##hen\nlou\n##nen\ntara\ntrembling\nantarctic\nton\nlearns\n##tas\ncrap\ncrucial\nfaction\natop\n##borough\nwrap\nlancaster\nodds\nhopkins\nerik\nlyon\n##eon\nbros\n##ode\nsnap\nlocality\ntips\nempress\ncrowned\ncal\nacclaimed\nchuckled\n##ory\nclara\nsends\nmild\ntowel\n##fl\n##day\n##а\nwishing\nassuming\ninterviewed\n##bal\n##die\ninteractions\neden\ncups\nhelena\n##lf\nindie\nbeck\n##fire\nbatteries\nfilipino\nwizard\nparted\n##lam\ntraces\n##born\nrows\nidol\nalbany\ndelegates\n##ees\n##sar\ndiscussions\n##ex\nnotre\ninstructed\nbelgrade\nhighways\nsuggestion\nlauren\npossess\norientation\nalexandria\nabdul\nbeats\nsalary\nreunion\nludwig\nalright\nwagner\nintimate\npockets\nslovenia\nhugged\nbrighton\nmerchants\ncruel\nstole\ntrek\nslopes\nrepairs\nenrollment\npolitically\nunderlying\npromotional\ncounting\nboeing\n##bb\nisabella\nnaming\n##и\nkeen\nbacteria\nlisting\nseparately\nbelfast\nussr\n450\nlithuanian\nanybody\nribs\nsphere\nmartinez\ncock\nembarrassed\nproposals\nfragments\nnationals\n##fs\n##wski\npremises\nfin\n1500\nalpine\nmatched\nfreely\nbounded\njace\nsleeve\n##af\ngaming\npier\npopulated\nevident\n##like\nfrances\nflooded\n##dle\nfrightened\npour\ntrainer\nframed\nvisitor\nchallenging\npig\nwickets\n##fold\ninfected\nemail\n##pes\narose\n##aw\nreward\necuador\noblast\nvale\nch\nshuttle\n##usa\nbach\nrankings\nforbidden\ncornwall\naccordance\nsalem\nconsumers\nbruno\nfantastic\ntoes\nmachinery\nresolved\njulius\nremembering\npropaganda\niceland\nbombardment\ntide\ncontacts\nwives\n##rah\nconcerto\nmacdonald\nalbania\nimplement\ndaisy\ntapped\nsudan\nhelmet\nangela\nmistress\n##lic\ncrop\nsunk\nfinest\n##craft\nhostile\n##ute\n##tsu\nboxer\nfr\npaths\nadjusted\nhabit\nballot\nsupervision\nsoprano\n##zen\nbullets\nwicked\nsunset\nregiments\ndisappear\nlamp\nperforms\napp\n##gia\n##oa\nrabbit\ndigging\nincidents\nentries\n##cion\ndishes\n##oi\nintroducing\n##ati\n##fied\nfreshman\nslot\njill\ntackles\nbaroque\nbacks\n##iest\nlone\nsponsor\ndestiny\naltogether\nconvert\n##aro\nconsensus\nshapes\ndemonstration\nbasically\nfeminist\nauction\nartifacts\n##bing\nstrongest\ntwitter\nhalifax\n2019\nallmusic\nmighty\nsmallest\nprecise\nalexandra\nviola\n##los\n##ille\nmanuscripts\n##illo\ndancers\nari\nmanagers\nmonuments\nblades\nbarracks\nspringfield\nmaiden\nconsolidated\nelectron\n##end\nberry\nairing\nwheat\nnobel\ninclusion\nblair\npayments\ngeography\nbee\ncc\neleanor\nreact\n##hurst\nafc\nmanitoba\n##yu\nsu\nlineup\nfitness\nrecreational\ninvestments\nairborne\ndisappointment\n##dis\nedmonton\nviewing\n##row\nrenovation\n##cast\ninfant\nbankruptcy\nroses\naftermath\npavilion\n##yer\ncarpenter\nwithdrawal\nladder\n##hy\ndiscussing\npopped\nreliable\nagreements\nrochester\n##abad\ncurves\nbombers\n220\nrao\nreverend\ndecreased\nchoosing\n107\nstiff\nconsulting\nnaples\ncrawford\ntracy\nka\nribbon\ncops\n##lee\ncrushed\ndeciding\nunified\nteenager\naccepting\nflagship\nexplorer\npoles\nsanchez\ninspection\nrevived\nskilled\ninduced\nexchanged\nflee\nlocals\ntragedy\nswallow\nloading\nhanna\ndemonstrate\n##ela\nsalvador\nflown\ncontestants\ncivilization\n##ines\nwanna\nrhodes\nfletcher\nhector\nknocking\nconsiders\n##ough\nnash\nmechanisms\nsensed\nmentally\nwalt\nunclear\n##eus\nrenovated\nmadame\n##cks\ncrews\ngovernmental\n##hin\nundertaken\nmonkey\n##ben\n##ato\nfatal\narmored\ncopa\ncaves\ngovernance\ngrasp\nperception\ncertification\nfroze\ndamp\ntugged\nwyoming\n##rg\n##ero\nnewman\n##lor\nnerves\ncuriosity\ngraph\n115\n##ami\nwithdraw\ntunnels\ndull\nmeredith\nmoss\nexhibits\nneighbors\ncommunicate\naccuracy\nexplored\nraiders\nrepublicans\nsecular\nkat\nsuperman\npenny\ncriticised\n##tch\nfreed\nupdate\nconviction\nwade\nham\nlikewise\ndelegation\ngotta\ndoll\npromises\ntechnological\nmyth\nnationality\nresolve\nconvent\n##mark\nsharon\ndig\nsip\ncoordinator\nentrepreneur\nfold\n##dine\ncapability\ncouncillor\nsynonym\nblown\nswan\ncursed\n1815\njonas\nhaired\nsofa\ncanvas\nkeeper\nrivalry\n##hart\nrapper\nspeedway\nswords\npostal\nmaxwell\nestonia\npotter\nrecurring\n##nn\n##ave\nerrors\n##oni\ncognitive\n1834\n##²\nclaws\nnadu\nroberto\nbce\nwrestler\nellie\n##ations\ninfinite\nink\n##tia\npresumably\nfinite\nstaircase\n108\nnoel\npatricia\nnacional\n##cation\nchill\neternal\ntu\npreventing\nprussia\nfossil\nlimbs\n##logist\nernst\nfrog\nperez\nrene\n##ace\npizza\nprussian\n##ios\n##vy\nmolecules\nregulatory\nanswering\nopinions\nsworn\nlengths\nsupposedly\nhypothesis\nupward\nhabitats\nseating\nancestors\ndrank\nyield\nhd\nsynthesis\nresearcher\nmodest\n##var\nmothers\npeered\nvoluntary\nhomeland\n##the\nacclaim\n##igan\nstatic\nvalve\nluxembourg\nalto\ncarroll\nfe\nreceptor\nnorton\nambulance\n##tian\njohnston\ncatholics\ndepicting\njointly\nelephant\ngloria\nmentor\nbadge\nahmad\ndistinguish\nremarked\ncouncils\nprecisely\nallison\nadvancing\ndetection\ncrowded\n##10\ncooperative\nankle\nmercedes\ndagger\nsurrendered\npollution\ncommit\nsubway\njeffrey\nlesson\nsculptures\nprovider\n##fication\nmembrane\ntimothy\nrectangular\nfiscal\nheating\nteammate\nbasket\nparticle\nanonymous\ndeployment\n##ple\nmissiles\ncourthouse\nproportion\nshoe\nsec\n##ller\ncomplaints\nforbes\nblacks\nabandon\nremind\nsizes\noverwhelming\nautobiography\nnatalie\n##awa\nrisks\ncontestant\ncountryside\nbabies\nscorer\ninvaded\nenclosed\nproceed\nhurling\ndisorders\n##cu\nreflecting\ncontinuously\ncruiser\ngraduates\nfreeway\ninvestigated\nore\ndeserved\nmaid\nblocking\nphillip\njorge\nshakes\ndove\nmann\nvariables\nlacked\nburden\naccompanying\nque\nconsistently\norganizing\nprovisional\ncomplained\nendless\n##rm\ntubes\njuice\ngeorges\nkrishna\nmick\nlabels\nthriller\n##uch\nlaps\narcade\nsage\nsnail\n##table\nshannon\nfi\nlaurence\nseoul\nvacation\npresenting\nhire\nchurchill\nsurprisingly\nprohibited\nsavannah\ntechnically\n##oli\n170\n##lessly\ntestimony\nsuited\nspeeds\ntoys\nromans\nmlb\nflowering\nmeasurement\ntalented\nkay\nsettings\ncharleston\nexpectations\nshattered\nachieving\ntriumph\nceremonies\nportsmouth\nlanes\nmandatory\nloser\nstretching\ncologne\nrealizes\nseventy\ncornell\ncareers\nwebb\n##ulating\namericas\nbudapest\nava\nsuspicion\n##ison\nyo\nconrad\n##hai\nsterling\njessie\nrector\n##az\n1831\ntransform\norganize\nloans\nchristine\nvolcanic\nwarrant\nslender\nsummers\nsubfamily\nnewer\ndanced\ndynamics\nrhine\nproceeds\nheinrich\ngastropod\ncommands\nsings\nfacilitate\neaster\nra\npositioned\nresponses\nexpense\nfruits\nyanked\nimported\n25th\nvelvet\nvic\nprimitive\ntribune\nbaldwin\nneighbourhood\ndonna\nrip\nhay\npr\n##uro\n1814\nespn\nwelcomed\n##aria\nqualifier\nglare\nhighland\ntiming\n##cted\nshells\neased\ngeometry\nlouder\nexciting\nslovakia\n##sion\n##iz\n##lot\nsavings\nprairie\n##ques\nmarching\nrafael\ntonnes\n##lled\ncurtain\npreceding\nshy\nheal\ngreene\nworthy\n##pot\ndetachment\nbury\nsherman\n##eck\nreinforced\nseeks\nbottles\ncontracted\nduchess\noutfit\nwalsh\n##sc\nmickey\n##ase\ngeoffrey\narcher\nsqueeze\ndawson\neliminate\ninvention\n##enberg\nneal\n##eth\nstance\ndealer\ncoral\nmaple\nretire\npolo\nsimplified\n##ht\n1833\nhid\nwatts\nbackwards\njules\n##oke\ngenesis\nmt\nframes\nrebounds\nburma\nwoodland\nmoist\nsantos\nwhispers\ndrained\nsubspecies\n##aa\nstreaming\nulster\nburnt\ncorrespondence\nmaternal\ngerard\ndenis\nstealing\n##load\ngenius\nduchy\n##oria\ninaugurated\nmomentum\nsuits\nplacement\nsovereign\nclause\nthames\n##hara\nconfederation\nreservation\nsketch\nyankees\nlets\nrotten\ncharm\nhal\nverses\nultra\ncommercially\ndot\nsalon\ncitation\nadopt\nwinnipeg\nmist\nallocated\ncairo\n##boy\njenkins\ninterference\nobjectives\n##wind\n1820\nportfolio\narmoured\nsectors\n##eh\ninitiatives\n##world\nintegrity\nexercises\nrobe\ntap\nab\ngazed\n##tones\ndistracted\nrulers\n111\nfavorable\njerome\ntended\ncart\nfactories\n##eri\ndiplomat\nvalued\ngravel\ncharitable\n##try\ncalvin\nexploring\nchang\nshepherd\nterrace\npdf\npupil\n##ural\nreflects\nups\n##rch\ngovernors\nshelf\ndepths\n##nberg\ntrailed\ncrest\ntackle\n##nian\n##ats\nhatred\n##kai\nclare\nmakers\nethiopia\nlongtime\ndetected\nembedded\nlacking\nslapped\nrely\nthomson\nanticipation\niso\nmorton\nsuccessive\nagnes\nscreenwriter\nstraightened\nphilippe\nplaywright\nhaunted\nlicence\niris\nintentions\nsutton\n112\nlogical\ncorrectly\n##weight\nbranded\nlicked\ntipped\nsilva\nricky\nnarrator\nrequests\n##ents\ngreeted\nsupernatural\ncow\n##wald\nlung\nrefusing\nemployer\nstrait\ngaelic\nliner\n##piece\nzoe\nsabha\n##mba\ndriveway\nharvest\nprints\nbates\nreluctantly\nthreshold\nalgebra\nira\nwherever\ncoupled\n240\nassumption\npicks\n##air\ndesigners\nraids\ngentlemen\n##ean\nroller\nblowing\nleipzig\nlocks\nscrew\ndressing\nstrand\n##lings\nscar\ndwarf\ndepicts\n##nu\nnods\n##mine\ndiffer\nboris\n##eur\nyuan\nflip\n##gie\nmob\ninvested\nquestioning\napplying\n##ture\nshout\n##sel\ngameplay\nblamed\nillustrations\nbothered\nweakness\nrehabilitation\n##of\n##zes\nenvelope\nrumors\nminers\nleicester\nsubtle\nkerry\n##ico\nferguson\n##fu\npremiership\nne\n##cat\nbengali\nprof\ncatches\nremnants\ndana\n##rily\nshouting\npresidents\nbaltic\nought\nghosts\ndances\nsailors\nshirley\nfancy\ndominic\n##bie\nmadonna\n##rick\nbark\nbuttons\ngymnasium\nashes\nliver\ntoby\noath\nprovidence\ndoyle\nevangelical\nnixon\ncement\ncarnegie\nembarked\nhatch\nsurroundings\nguarantee\nneeding\npirate\nessence\n##bee\nfilter\ncrane\nhammond\nprojected\nimmune\npercy\ntwelfth\n##ult\nregent\ndoctoral\ndamon\nmikhail\n##ichi\nlu\ncritically\nelect\nrealised\nabortion\nacute\nscreening\nmythology\nsteadily\n##fc\nfrown\nnottingham\nkirk\nwa\nminneapolis\n##rra\nmodule\nalgeria\nmc\nnautical\nencounters\nsurprising\nstatues\navailability\nshirts\npie\nalma\nbrows\nmunster\nmack\nsoup\ncrater\ntornado\nsanskrit\ncedar\nexplosive\nbordered\ndixon\nplanets\nstamp\nexam\nhappily\n##bble\ncarriers\nkidnapped\n##vis\naccommodation\nemigrated\n##met\nknockout\ncorrespondent\nviolation\nprofits\npeaks\nlang\nspecimen\nagenda\nancestry\npottery\nspelling\nequations\nobtaining\nki\nlinking\n1825\ndebris\nasylum\n##20\nbuddhism\nteddy\n##ants\ngazette\n##nger\n##sse\ndental\neligibility\nutc\nfathers\naveraged\nzimbabwe\nfrancesco\ncoloured\nhissed\ntranslator\nlynch\nmandate\nhumanities\nmackenzie\nuniforms\nlin\n##iana\n##gio\nasset\nmhz\nfitting\nsamantha\ngenera\nwei\nrim\nbeloved\nshark\nriot\nentities\nexpressions\nindo\ncarmen\nslipping\nowing\nabbot\nneighbor\nsidney\n##av\nrats\nrecommendations\nencouraging\nsquadrons\nanticipated\ncommanders\nconquered\n##oto\ndonations\ndiagnosed\n##mond\ndivide\n##iva\nguessed\ndecoration\nvernon\nauditorium\nrevelation\nconversations\n##kers\n##power\nherzegovina\ndash\nalike\nprotested\nlateral\nherman\naccredited\nmg\n##gent\nfreeman\nmel\nfiji\ncrow\ncrimson\n##rine\nlivestock\n##pped\nhumanitarian\nbored\noz\nwhip\n##lene\n##ali\nlegitimate\nalter\ngrinning\nspelled\nanxious\noriental\nwesley\n##nin\n##hole\ncarnival\ncontroller\ndetect\n##ssa\nbowed\neducator\nkosovo\nmacedonia\n##sin\noccupy\nmastering\nstephanie\njaneiro\npara\nunaware\nnurses\nnoon\n135\ncam\nhopefully\nranger\ncombine\nsociology\npolar\nrica\n##eer\nneill\n##sman\nholocaust\n##ip\ndoubled\nlust\n1828\n109\ndecent\ncooling\nunveiled\n##card\n1829\nnsw\nhomer\nchapman\nmeyer\n##gin\ndive\nmae\nreagan\nexpertise\n##gled\ndarwin\nbrooke\nsided\nprosecution\ninvestigating\ncomprised\npetroleum\ngenres\nreluctant\ndifferently\ntrilogy\njohns\nvegetables\ncorpse\nhighlighted\nlounge\npension\nunsuccessfully\nelegant\naided\nivory\nbeatles\namelia\ncain\ndubai\nsunny\nimmigrant\nbabe\nclick\n##nder\nunderwater\npepper\ncombining\nmumbled\natlas\nhorns\naccessed\nballad\nphysicians\nhomeless\ngestured\nrpm\nfreak\nlouisville\ncorporations\npatriots\nprizes\nrational\nwarn\nmodes\ndecorative\novernight\ndin\ntroubled\nphantom\n##ort\nmonarch\nsheer\n##dorf\ngenerals\nguidelines\norgans\naddresses\n##zon\nenhance\ncurling\nparishes\ncord\n##kie\nlinux\ncaesar\ndeutsche\nbavaria\n##bia\ncoleman\ncyclone\n##eria\nbacon\npetty\n##yama\n##old\nhampton\ndiagnosis\n1824\nthrows\ncomplexity\nrita\ndisputed\n##₃\npablo\n##sch\nmarketed\ntrafficking\n##ulus\nexamine\nplague\nformats\n##oh\nvault\nfaithful\n##bourne\nwebster\n##ox\nhighlights\n##ient\n##ann\nphones\nvacuum\nsandwich\nmodeling\n##gated\nbolivia\nclergy\nqualities\nisabel\n##nas\n##ars\nwears\nscreams\nreunited\nannoyed\nbra\n##ancy\n##rate\ndifferential\ntransmitter\ntattoo\ncontainer\npoker\n##och\nexcessive\nresides\ncowboys\n##tum\naugustus\ntrash\nproviders\nstatute\nretreated\nbalcony\nreversed\nvoid\nstorey\npreceded\nmasses\nleap\nlaughs\nneighborhoods\nwards\nschemes\nfalcon\nsanto\nbattlefield\npad\nronnie\nthread\nlesbian\nvenus\n##dian\nbeg\nsandstone\ndaylight\npunched\ngwen\nanalog\nstroked\nwwe\nacceptable\nmeasurements\ndec\ntoxic\n##kel\nadequate\nsurgical\neconomist\nparameters\nvarsity\n##sberg\nquantity\nella\n##chy\n##rton\ncountess\ngenerating\nprecision\ndiamonds\nexpressway\nga\n##ı\n1821\nuruguay\ntalents\ngalleries\nexpenses\nscanned\ncolleague\noutlets\nryder\nlucien\n##ila\nparamount\n##bon\nsyracuse\ndim\nfangs\ngown\nsweep\n##sie\ntoyota\nmissionaries\nwebsites\n##nsis\nsentences\nadviser\nval\ntrademark\nspells\n##plane\npatience\nstarter\nslim\n##borg\ntoe\nincredibly\nshoots\nelliot\nnobility\n##wyn\ncowboy\nendorsed\ngardner\ntendency\npersuaded\norganisms\nemissions\nkazakhstan\namused\nboring\nchips\nthemed\n##hand\nllc\nconstantinople\nchasing\nsystematic\nguatemala\nborrowed\nerin\ncarey\n##hard\nhighlands\nstruggles\n1810\n##ifying\n##ced\nwong\nexceptions\ndevelops\nenlarged\nkindergarten\ncastro\n##ern\n##rina\nleigh\nzombie\njuvenile\n##most\nconsul\n##nar\nsailor\nhyde\nclarence\nintensive\npinned\nnasty\nuseless\njung\nclayton\nstuffed\nexceptional\nix\napostolic\n230\ntransactions\n##dge\nexempt\nswinging\ncove\nreligions\n##ash\nshields\ndairy\nbypass\n190\npursuing\nbug\njoyce\nbombay\nchassis\nsouthampton\nchat\ninteract\nredesignated\n##pen\nnascar\npray\nsalmon\nrigid\nregained\nmalaysian\ngrim\npublicity\nconstituted\ncapturing\ntoilet\ndelegate\npurely\ntray\ndrift\nloosely\nstriker\nweakened\ntrinidad\nmitch\nitv\ndefines\ntransmitted\nming\nscarlet\nnodding\nfitzgerald\nfu\nnarrowly\nsp\ntooth\nstandings\nvirtue\n##₁\n##wara\n##cting\nchateau\ngloves\nlid\n##nel\nhurting\nconservatory\n##pel\nsinclair\nreopened\nsympathy\nnigerian\nstrode\nadvocated\noptional\nchronic\ndischarge\n##rc\nsuck\ncompatible\nlaurel\nstella\nshi\nfails\nwage\ndodge\n128\ninformal\nsorts\nlevi\nbuddha\nvillagers\n##aka\nchronicles\nheavier\nsummoned\ngateway\n3000\neleventh\njewelry\ntranslations\naccordingly\nseas\n##ency\nfiber\npyramid\ncubic\ndragging\n##ista\ncaring\n##ops\nandroid\ncontacted\nlunar\n##dt\nkai\nlisbon\npatted\n1826\nsacramento\ntheft\nmadagascar\nsubtropical\ndisputes\nta\nholidays\npiper\nwillow\nmare\ncane\nitunes\nnewfoundland\nbenny\ncompanions\ndong\nraj\nobserve\nroar\ncharming\nplaque\ntibetan\nfossils\nenacted\nmanning\nbubble\ntina\ntanzania\n##eda\n##hir\nfunk\nswamp\ndeputies\ncloak\nufc\nscenario\npar\nscratch\nmetals\nanthem\nguru\nengaging\nspecially\n##boat\ndialects\nnineteen\ncecil\nduet\ndisability\nmessenger\nunofficial\n##lies\ndefunct\neds\nmoonlight\ndrainage\nsurname\npuzzle\nhonda\nswitching\nconservatives\nmammals\nknox\nbroadcaster\nsidewalk\ncope\n##ried\nbenson\nprinces\npeterson\n##sal\nbedford\nsharks\neli\nwreck\nalberto\ngasp\narchaeology\nlgbt\nteaches\nsecurities\nmadness\ncompromise\nwaving\ncoordination\ndavidson\nvisions\nleased\npossibilities\neighty\njun\nfernandez\nenthusiasm\nassassin\nsponsorship\nreviewer\nkingdoms\nestonian\nlaboratories\n##fy\n##nal\napplies\nverb\ncelebrations\n##zzo\nrowing\nlightweight\nsadness\nsubmit\nmvp\nbalanced\ndude\n##vas\nexplicitly\nmetric\nmagnificent\nmound\nbrett\nmohammad\nmistakes\nirregular\n##hing\n##ass\nsanders\nbetrayed\nshipped\nsurge\n##enburg\nreporters\ntermed\ngeorg\npity\nverbal\nbulls\nabbreviated\nenabling\nappealed\n##are\n##atic\nsicily\nsting\nheel\nsweetheart\nbart\nspacecraft\nbrutal\nmonarchy\n##tter\naberdeen\ncameo\ndiane\n##ub\nsurvivor\nclyde\n##aries\ncomplaint\n##makers\nclarinet\ndelicious\nchilean\nkarnataka\ncoordinates\n1818\npanties\n##rst\npretending\nar\ndramatically\nkiev\nbella\ntends\ndistances\n113\ncatalog\nlaunching\ninstances\ntelecommunications\nportable\nlindsay\nvatican\n##eim\nangles\naliens\nmarker\nstint\nscreens\nbolton\n##rne\njudy\nwool\nbenedict\nplasma\neuropa\nspark\nimaging\nfilmmaker\nswiftly\n##een\ncontributor\n##nor\nopted\nstamps\napologize\nfinancing\nbutter\ngideon\nsophisticated\nalignment\navery\nchemicals\nyearly\nspeculation\nprominence\nprofessionally\n##ils\nimmortal\ninstitutional\ninception\nwrists\nidentifying\ntribunal\nderives\ngains\n##wo\npapal\npreference\nlinguistic\nvince\noperative\nbrewery\n##ont\nunemployment\nboyd\n##ured\n##outs\nalbeit\nprophet\n1813\nbi\n##rr\n##face\n##rad\nquarterly\nasteroid\ncleaned\nradius\ntemper\n##llen\ntelugu\njerk\nviscount\nmenu\n##ote\nglimpse\n##aya\nyacht\nhawaiian\nbaden\n##rl\nlaptop\nreadily\n##gu\nmonetary\noffshore\nscots\nwatches\n##yang\n##arian\nupgrade\nneedle\nxbox\nlea\nencyclopedia\nflank\nfingertips\n##pus\ndelight\nteachings\nconfirm\nroth\nbeaches\nmidway\nwinters\n##iah\nteasing\ndaytime\nbeverly\ngambling\nbonnie\n##backs\nregulated\nclement\nhermann\ntricks\nknot\n##shing\n##uring\n##vre\ndetached\necological\nowed\nspecialty\nbyron\ninventor\nbats\nstays\nscreened\nunesco\nmidland\ntrim\naffection\n##ander\n##rry\njess\nthoroughly\nfeedback\n##uma\nchennai\nstrained\nheartbeat\nwrapping\novertime\npleaded\n##sworth\nmon\nleisure\noclc\n##tate\n##ele\nfeathers\nangelo\nthirds\nnuts\nsurveys\nclever\ngill\ncommentator\n##dos\ndarren\nrides\ngibraltar\n##nc\n##mu\ndissolution\ndedication\nshin\nmeals\nsaddle\nelvis\nreds\nchaired\ntaller\nappreciation\nfunctioning\nniece\nfavored\nadvocacy\nrobbie\ncriminals\nsuffolk\nyugoslav\npassport\nconstable\ncongressman\nhastings\nvera\n##rov\nconsecrated\nsparks\necclesiastical\nconfined\n##ovich\nmuller\nfloyd\nnora\n1822\npaved\n1827\ncumberland\nned\nsaga\nspiral\n##flow\nappreciated\nyi\ncollaborative\ntreating\nsimilarities\nfeminine\nfinishes\n##ib\njade\nimport\n##nse\n##hot\nchampagne\nmice\nsecuring\ncelebrities\nhelsinki\nattributes\n##gos\ncousins\nphases\nache\nlucia\ngandhi\nsubmission\nvicar\nspear\nshine\ntasmania\nbiting\ndetention\nconstitute\ntighter\nseasonal\n##gus\nterrestrial\nmatthews\n##oka\neffectiveness\nparody\nphilharmonic\n##onic\n1816\nstrangers\nencoded\nconsortium\nguaranteed\nregards\nshifts\ntortured\ncollision\nsupervisor\ninform\nbroader\ninsight\ntheaters\narmour\nemeritus\nblink\nincorporates\nmapping\n##50\n##ein\nhandball\nflexible\n##nta\nsubstantially\ngenerous\nthief\n##own\ncarr\nloses\n1793\nprose\nucla\nromeo\ngeneric\nmetallic\nrealization\ndamages\nmk\ncommissioners\nzach\ndefault\n##ther\nhelicopters\nlengthy\nstems\nspa\npartnered\nspectators\nrogue\nindication\npenalties\nteresa\n1801\nsen\n##tric\ndalton\n##wich\nirving\nphotographic\n##vey\ndell\ndeaf\npeters\nexcluded\nunsure\n##vable\npatterson\ncrawled\n##zio\nresided\nwhipped\nlatvia\nslower\necole\npipes\nemployers\nmaharashtra\ncomparable\nva\ntextile\npageant\n##gel\nalphabet\nbinary\nirrigation\nchartered\nchoked\nantoine\noffs\nwaking\nsupplement\n##wen\nquantities\ndemolition\nregain\nlocate\nurdu\nfolks\nalt\n114\n##mc\nscary\nandreas\nwhites\n##ava\nclassrooms\nmw\naesthetic\npublishes\nvalleys\nguides\ncubs\njohannes\nbryant\nconventions\naffecting\n##itt\ndrain\nawesome\nisolation\nprosecutor\nambitious\napology\ncaptive\ndowns\natmospheric\nlorenzo\naisle\nbeef\nfoul\n##onia\nkidding\ncomposite\ndisturbed\nillusion\nnatives\n##ffer\nemi\nrockets\nriverside\nwartime\npainters\nadolf\nmelted\n##ail\nuncertainty\nsimulation\nhawks\nprogressed\nmeantime\nbuilder\nspray\nbreach\nunhappy\nregina\nrussians\n##urg\ndetermining\n##tation\ntram\n1806\n##quin\naging\n##12\n1823\ngarion\nrented\nmister\ndiaz\nterminated\nclip\n1817\ndepend\nnervously\ndisco\nowe\ndefenders\nshiva\nnotorious\ndisbelief\nshiny\nworcester\n##gation\n##yr\ntrailing\nundertook\nislander\nbelarus\nlimitations\nwatershed\nfuller\noverlooking\nutilized\nraphael\n1819\nsynthetic\nbreakdown\nklein\n##nate\nmoaned\nmemoir\nlamb\npracticing\n##erly\ncellular\narrows\nexotic\n##graphy\nwitches\n117\ncharted\nrey\nhut\nhierarchy\nsubdivision\nfreshwater\ngiuseppe\naloud\nreyes\nqatar\nmarty\nsideways\nutterly\nsexually\njude\nprayers\nmccarthy\nsoftball\nblend\ndamien\n##gging\n##metric\nwholly\nerupted\nlebanese\nnegro\nrevenues\ntasted\ncomparative\nteamed\ntransaction\nlabeled\nmaori\nsovereignty\nparkway\ntrauma\ngran\nmalay\n121\nadvancement\ndescendant\n2020\nbuzz\nsalvation\ninventory\nsymbolic\n##making\nantarctica\nmps\n##gas\n##bro\nmohammed\nmyanmar\nholt\nsubmarines\ntones\n##lman\nlocker\npatriarch\nbangkok\nemerson\nremarks\npredators\nkin\nafghan\nconfession\nnorwich\nrental\nemerge\nadvantages\n##zel\nrca\n##hold\nshortened\nstorms\naidan\n##matic\nautonomy\ncompliance\n##quet\ndudley\natp\n##osis\n1803\nmotto\ndocumentation\nsummary\nprofessors\nspectacular\nchristina\narchdiocese\nflashing\ninnocence\nremake\n##dell\npsychic\nreef\nscare\nemploy\nrs\nsticks\nmeg\ngus\nleans\n##ude\naccompany\nbergen\ntomas\n##iko\ndoom\nwages\npools\n##nch\n##bes\nbreasts\nscholarly\nalison\noutline\nbrittany\nbreakthrough\nwillis\nrealistic\n##cut\n##boro\ncompetitor\n##stan\npike\npicnic\nicon\ndesigning\ncommercials\nwashing\nvillain\nskiing\nmicro\ncostumes\nauburn\nhalted\nexecutives\n##hat\nlogistics\ncycles\nvowel\napplicable\nbarrett\nexclaimed\neurovision\neternity\nramon\n##umi\n##lls\nmodifications\nsweeping\ndisgust\n##uck\ntorch\naviv\nensuring\nrude\ndusty\nsonic\ndonovan\noutskirts\ncu\npathway\n##band\n##gun\n##lines\ndisciplines\nacids\ncadet\npaired\n##40\nsketches\n##sive\nmarriages\n##⁺\nfolding\npeers\nslovak\nimplies\nadmired\n##beck\n1880s\nleopold\ninstinct\nattained\nweston\nmegan\nhorace\n##ination\ndorsal\ningredients\nevolutionary\n##its\ncomplications\ndeity\nlethal\nbrushing\nlevy\ndeserted\ninstitutes\nposthumously\ndelivering\ntelescope\ncoronation\nmotivated\nrapids\nluc\nflicked\npays\nvolcano\ntanner\nweighed\n##nica\ncrowds\nfrankie\ngifted\naddressing\ngranddaughter\nwinding\n##rna\nconstantine\ngomez\n##front\nlandscapes\nrudolf\nanthropology\nslate\nwerewolf\n##lio\nastronomy\ncirca\nrouge\ndreaming\nsack\nknelt\ndrowned\nnaomi\nprolific\ntracked\nfreezing\nherb\n##dium\nagony\nrandall\ntwisting\nwendy\ndeposit\ntouches\nvein\nwheeler\n##bbled\n##bor\nbatted\nretaining\ntire\npresently\ncompare\nspecification\ndaemon\nnigel\n##grave\nmerry\nrecommendation\nczechoslovakia\nsandra\nng\nroma\n##sts\nlambert\ninheritance\nsheikh\nwinchester\ncries\nexamining\n##yle\ncomeback\ncuisine\nnave\n##iv\nko\nretrieve\ntomatoes\nbarker\npolished\ndefining\nirene\nlantern\npersonalities\nbegging\ntract\nswore\n1809\n175\n##gic\nomaha\nbrotherhood\n##rley\nhaiti\n##ots\nexeter\n##ete\n##zia\nsteele\ndumb\npearson\n210\nsurveyed\nelisabeth\ntrends\n##ef\nfritz\n##rf\npremium\nbugs\nfraction\ncalmly\nviking\n##birds\ntug\ninserted\nunusually\n##ield\nconfronted\ndistress\ncrashing\nbrent\nturks\nresign\n##olo\ncambodia\ngabe\nsauce\n##kal\nevelyn\n116\nextant\nclusters\nquarry\nteenagers\nluna\n##lers\n##ister\naffiliation\ndrill\n##ashi\npanthers\nscenic\nlibya\nanita\nstrengthen\ninscriptions\n##cated\nlace\nsued\njudith\nriots\n##uted\nmint\n##eta\npreparations\nmidst\ndub\nchallenger\n##vich\nmock\ncf\ndisplaced\nwicket\nbreaths\nenables\nschmidt\nanalyst\n##lum\nag\nhighlight\nautomotive\naxe\njosef\nnewark\nsufficiently\nresembles\n50th\n##pal\nflushed\nmum\ntraits\n##ante\ncommodore\nincomplete\nwarming\ntitular\nceremonial\nethical\n118\ncelebrating\neighteenth\ncao\nlima\nmedalist\nmobility\nstrips\nsnakes\n##city\nminiature\nzagreb\nbarton\nescapes\numbrella\nautomated\ndoubted\ndiffers\ncooled\ngeorgetown\ndresden\ncooked\nfade\nwyatt\nrna\njacobs\ncarlton\nabundant\nstereo\nboost\nmadras\ninning\n##hia\nspur\nip\nmalayalam\nbegged\nosaka\ngroan\nescaping\ncharging\ndose\nvista\n##aj\nbud\npapa\ncommunists\nadvocates\nedged\ntri\n##cent\nresemble\npeaking\nnecklace\nfried\nmontenegro\nsaxony\ngoose\nglances\nstuttgart\ncurator\nrecruit\ngrocery\nsympathetic\n##tting\n##fort\n127\nlotus\nrandolph\nancestor\n##rand\nsucceeding\njupiter\n1798\nmacedonian\n##heads\nhiking\n1808\nhanding\nfischer\n##itive\ngarbage\nnode\n##pies\nprone\nsingular\npapua\ninclined\nattractions\nitalia\npouring\nmotioned\ngrandma\ngarnered\njacksonville\ncorp\nego\nringing\naluminum\n##hausen\nordering\n##foot\ndrawer\ntraders\nsynagogue\n##play\n##kawa\nresistant\nwandering\nfragile\nfiona\nteased\nvar\nhardcore\nsoaked\njubilee\ndecisive\nexposition\nmercer\nposter\nvalencia\nhale\nkuwait\n1811\n##ises\n##wr\n##eed\ntavern\ngamma\n122\njohan\n##uer\nairways\namino\ngil\n##ury\nvocational\ndomains\ntorres\n##sp\ngenerator\nfolklore\noutcomes\n##keeper\ncanberra\nshooter\nfl\nbeams\nconfrontation\n##lling\n##gram\nfeb\naligned\nforestry\npipeline\njax\nmotorway\nconception\ndecay\n##tos\ncoffin\n##cott\nstalin\n1805\nescorted\nminded\n##nam\nsitcom\npurchasing\ntwilight\nveronica\nadditions\npassive\ntensions\nstraw\n123\nfrequencies\n1804\nrefugee\ncultivation\n##iate\nchristie\nclary\nbulletin\ncrept\ndisposal\n##rich\n##zong\nprocessor\ncrescent\n##rol\nbmw\nemphasized\nwhale\nnazis\naurora\n##eng\ndwelling\nhauled\nsponsors\ntoledo\nmega\nideology\ntheatres\ntessa\ncerambycidae\nsaves\nturtle\ncone\nsuspects\nkara\nrusty\nyelling\ngreeks\nmozart\nshades\ncocked\nparticipant\n##tro\nshire\nspit\nfreeze\nnecessity\n##cos\ninmates\nnielsen\ncouncillors\nloaned\nuncommon\nomar\npeasants\nbotanical\noffspring\ndaniels\nformations\njokes\n1794\npioneers\nsigma\nlicensing\n##sus\nwheelchair\npolite\n1807\nliquor\npratt\ntrustee\n##uta\nforewings\nballoon\n##zz\nkilometre\ncamping\nexplicit\ncasually\nshawn\nfoolish\nteammates\nnm\nhassan\ncarrie\njudged\nsatisfy\nvanessa\nknives\nselective\ncnn\nflowed\n##lice\neclipse\nstressed\neliza\nmathematician\ncease\ncultivated\n##roy\ncommissions\nbrowns\n##ania\ndestroyers\nsheridan\nmeadow\n##rius\nminerals\n##cial\ndownstream\nclash\ngram\nmemoirs\nventures\nbaha\nseymour\narchie\nmidlands\nedith\nfare\nflynn\ninvite\ncanceled\ntiles\nstabbed\nboulder\nincorporate\namended\ncamden\nfacial\nmollusk\nunreleased\ndescriptions\nyoga\ngrabs\n550\nraises\nramp\nshiver\n##rose\ncoined\npioneering\ntunes\nqing\nwarwick\ntops\n119\nmelanie\ngiles\n##rous\nwandered\n##inal\nannexed\nnov\n30th\nunnamed\n##ished\norganizational\nairplane\nnormandy\nstoke\nwhistle\nblessing\nviolations\nchased\nholders\nshotgun\n##ctic\noutlet\nreactor\n##vik\ntires\ntearing\nshores\nfortified\nmascot\nconstituencies\nnc\ncolumnist\nproductive\ntibet\n##rta\nlineage\nhooked\noct\ntapes\njudging\ncody\n##gger\nhansen\nkashmir\ntriggered\n##eva\nsolved\ncliffs\n##tree\nresisted\nanatomy\nprotesters\ntransparent\nimplied\n##iga\ninjection\nmattress\nexcluding\n##mbo\ndefenses\nhelpless\ndevotion\n##elli\ngrowl\nliberals\nweber\nphenomena\natoms\nplug\n##iff\nmortality\napprentice\nhowe\nconvincing\naaa\nswimmer\nbarber\nleone\npromptly\nsodium\ndef\nnowadays\narise\n##oning\ngloucester\ncorrected\ndignity\nnorm\nerie\n##ders\nelders\nevacuated\nsylvia\ncompression\n##yar\nhartford\npose\nbackpack\nreasoning\naccepts\n24th\nwipe\nmillimetres\nmarcel\n##oda\ndodgers\nalbion\n1790\noverwhelmed\naerospace\noaks\n1795\nshowcase\nacknowledge\nrecovering\nnolan\nashe\nhurts\ngeology\nfashioned\ndisappearance\nfarewell\nswollen\nshrug\nmarquis\nwimbledon\n124\nrue\n1792\ncommemorate\nreduces\nexperiencing\ninevitable\ncalcutta\nintel\n##court\nmurderer\nsticking\nfisheries\nimagery\nbloom\n280\nbrake\n##inus\ngustav\nhesitation\nmemorable\npo\nviral\nbeans\naccidents\ntunisia\nantenna\nspilled\nconsort\ntreatments\naye\nperimeter\n##gard\ndonation\nhostage\nmigrated\nbanker\naddiction\napex\nlil\ntrout\n##ously\nconscience\n##nova\nrams\nsands\ngenome\npassionate\ntroubles\n##lets\n##set\namid\n##ibility\n##ret\nhiggins\nexceed\nvikings\n##vie\npayne\n##zan\nmuscular\n##ste\ndefendant\nsucking\n##wal\nibrahim\nfuselage\nclaudia\nvfl\neuropeans\nsnails\ninterval\n##garh\npreparatory\nstatewide\ntasked\nlacrosse\nviktor\n##lation\nangola\n##hra\nflint\nimplications\nemploys\nteens\npatrons\nstall\nweekends\nbarriers\nscrambled\nnucleus\ntehran\njenna\nparsons\nlifelong\nrobots\ndisplacement\n5000\n##bles\nprecipitation\n##gt\nknuckles\nclutched\n1802\nmarrying\necology\nmarx\naccusations\ndeclare\nscars\nkolkata\nmat\nmeadows\nbermuda\nskeleton\nfinalists\nvintage\ncrawl\ncoordinate\naffects\nsubjected\norchestral\nmistaken\n##tc\nmirrors\ndipped\nrelied\n260\narches\ncandle\n##nick\nincorporating\nwildly\nfond\nbasilica\nowl\nfringe\nrituals\nwhispering\nstirred\nfeud\ntertiary\nslick\ngoat\nhonorable\nwhereby\nskip\nricardo\nstripes\nparachute\nadjoining\nsubmerged\nsynthesizer\n##gren\nintend\npositively\nninety\nphi\nbeaver\npartition\nfellows\nalexis\nprohibition\ncarlisle\nbizarre\nfraternity\n##bre\ndoubts\nicy\ncbc\naquatic\nsneak\nsonny\ncombines\nairports\ncrude\nsupervised\nspatial\nmerge\nalfonso\n##bic\ncorrupt\nscan\nundergo\n##ams\ndisabilities\ncolombian\ncomparing\ndolphins\nperkins\n##lish\nreprinted\nunanimous\nbounced\nhairs\nunderworld\nmidwest\nsemester\nbucket\npaperback\nminiseries\ncoventry\ndemise\n##leigh\ndemonstrations\nsensor\nrotating\nyan\n##hler\narrange\nsoils\n##idge\nhyderabad\nlabs\n##dr\nbrakes\ngrandchildren\n##nde\nnegotiated\nrover\nferrari\ncontinuation\ndirectorate\naugusta\nstevenson\ncounterpart\ngore\n##rda\nnursery\nrican\nave\ncollectively\nbroadly\npastoral\nrepertoire\nasserted\ndiscovering\nnordic\nstyled\nfiba\ncunningham\nharley\nmiddlesex\nsurvives\ntumor\ntempo\nzack\naiming\nlok\nurgent\n##rade\n##nto\ndevils\n##ement\ncontractor\nturin\n##wl\n##ool\nbliss\nrepaired\nsimmons\nmoan\nastronomical\ncr\nnegotiate\nlyric\n1890s\nlara\nbred\nclad\nangus\npbs\n##ience\nengineered\nposed\n##lk\nhernandez\npossessions\nelbows\npsychiatric\nstrokes\nconfluence\nelectorate\nlifts\ncampuses\nlava\nalps\n##ep\n##ution\n##date\nphysicist\nwoody\n##page\n##ographic\n##itis\njuliet\nreformation\nsparhawk\n320\ncomplement\nsuppressed\njewel\n##½\nfloated\n##kas\ncontinuity\nsadly\n##ische\ninability\nmelting\nscanning\npaula\nflour\njudaism\nsafer\nvague\n##lm\nsolving\ncurb\n##stown\nfinancially\ngable\nbees\nexpired\nmiserable\ncassidy\ndominion\n1789\ncupped\n145\nrobbery\nfacto\namos\nwarden\nresume\ntallest\nmarvin\ning\npounded\nusd\ndeclaring\ngasoline\n##aux\ndarkened\n270\n650\nsophomore\n##mere\nerection\ngossip\ntelevised\nrisen\ndial\n##eu\npillars\n##link\npassages\nprofound\n##tina\narabian\nashton\nsilicon\nnail\n##ead\n##lated\n##wer\n##hardt\nfleming\nfirearms\nducked\ncircuits\nblows\nwaterloo\ntitans\n##lina\natom\nfireplace\ncheshire\nfinanced\nactivation\nalgorithms\n##zzi\nconstituent\ncatcher\ncherokee\npartnerships\nsexuality\nplatoon\ntragic\nvivian\nguarded\nwhiskey\nmeditation\npoetic\n##late\n##nga\n##ake\nporto\nlisteners\ndominance\nkendra\nmona\nchandler\nfactions\n22nd\nsalisbury\nattitudes\nderivative\n##ido\n##haus\nintake\npaced\njavier\nillustrator\nbarrels\nbias\ncockpit\nburnett\ndreamed\nensuing\n##anda\nreceptors\nsomeday\nhawkins\nmattered\n##lal\nslavic\n1799\njesuit\ncameroon\nwasted\ntai\nwax\nlowering\nvictorious\nfreaking\noutright\nhancock\nlibrarian\nsensing\nbald\ncalcium\nmyers\ntablet\nannouncing\nbarack\nshipyard\npharmaceutical\n##uan\ngreenwich\nflush\nmedley\npatches\nwolfgang\npt\nspeeches\nacquiring\nexams\nnikolai\n##gg\nhayden\nkannada\n##type\nreilly\n##pt\nwaitress\nabdomen\ndevastated\ncapped\npseudonym\npharmacy\nfulfill\nparaguay\n1796\nclicked\n##trom\narchipelago\nsyndicated\n##hman\nlumber\norgasm\nrejection\nclifford\nlorraine\nadvent\nmafia\nrodney\nbrock\n##ght\n##used\n##elia\ncassette\nchamberlain\ndespair\nmongolia\nsensors\ndevelopmental\nupstream\n##eg\n##alis\nspanning\n165\ntrombone\nbasque\nseeded\ninterred\nrenewable\nrhys\nleapt\nrevision\nmolecule\n##ages\nchord\nvicious\nnord\nshivered\n23rd\narlington\ndebts\ncorpus\nsunrise\nbays\nblackburn\ncentimetres\n##uded\nshuddered\ngm\nstrangely\ngripping\ncartoons\nisabelle\norbital\n##ppa\nseals\nproving\n##lton\nrefusal\nstrengthened\nbust\nassisting\nbaghdad\nbatsman\nportrayal\nmara\npushes\nspears\nog\n##cock\nreside\nnathaniel\nbrennan\n1776\nconfirmation\ncaucus\n##worthy\nmarkings\nyemen\nnobles\nku\nlazy\nviewer\ncatalan\nencompasses\nsawyer\n##fall\nsparked\nsubstances\npatents\nbraves\narranger\nevacuation\nsergio\npersuade\ndover\ntolerance\npenguin\ncum\njockey\ninsufficient\ntownships\noccupying\ndeclining\nplural\nprocessed\nprojection\npuppet\nflanders\nintroduces\nliability\n##yon\ngymnastics\nantwerp\ntaipei\nhobart\ncandles\njeep\nwes\nobservers\n126\nchaplain\nbundle\nglorious\n##hine\nhazel\nflung\nsol\nexcavations\ndumped\nstares\nsh\nbangalore\ntriangular\nicelandic\nintervals\nexpressing\nturbine\n##vers\nsongwriting\ncrafts\n##igo\njasmine\nditch\nrite\n##ways\nentertaining\ncomply\nsorrow\nwrestlers\nbasel\nemirates\nmarian\nrivera\nhelpful\n##some\ncaution\ndownward\nnetworking\n##atory\n##tered\ndarted\ngenocide\nemergence\nreplies\nspecializing\nspokesman\nconvenient\nunlocked\nfading\naugustine\nconcentrations\nresemblance\nelijah\ninvestigator\nandhra\n##uda\npromotes\nbean\n##rrell\nfleeing\nwan\nsimone\nannouncer\n##ame\n##bby\nlydia\nweaver\n132\nresidency\nmodification\n##fest\nstretches\n##ast\nalternatively\nnat\nlowe\nlacks\n##ented\npam\ntile\nconcealed\ninferior\nabdullah\nresidences\ntissues\nvengeance\n##ided\nmoisture\npeculiar\ngroove\nzip\nbologna\njennings\nninja\noversaw\nzombies\npumping\nbatch\nlivingston\nemerald\ninstallations\n1797\npeel\nnitrogen\nrama\n##fying\n##star\nschooling\nstrands\nresponding\nwerner\n##ost\nlime\ncasa\naccurately\ntargeting\n##rod\nunderway\n##uru\nhemisphere\nlester\n##yard\noccupies\n2d\ngriffith\nangrily\nreorganized\n##owing\ncourtney\ndeposited\n##dd\n##30\nestadio\n##ifies\ndunn\nexiled\n##ying\nchecks\n##combe\n##о\n##fly\nsuccesses\nunexpectedly\nblu\nassessed\n##flower\n##ه\nobserving\nsacked\nspiders\nkn\n##tail\nmu\nnodes\nprosperity\naudrey\ndivisional\n155\nbroncos\ntangled\nadjust\nfeeds\nerosion\npaolo\nsurf\ndirectory\nsnatched\nhumid\nadmiralty\nscrewed\ngt\nreddish\n##nese\nmodules\ntrench\nlamps\nbind\nleah\nbucks\ncompetes\n##nz\n##form\ntranscription\n##uc\nisles\nviolently\nclutching\npga\ncyclist\ninflation\nflats\nragged\nunnecessary\n##hian\nstubborn\ncoordinated\nharriet\nbaba\ndisqualified\n330\ninsect\nwolfe\n##fies\nreinforcements\nrocked\nduel\nwinked\nembraced\nbricks\n##raj\nhiatus\ndefeats\npending\nbrightly\njealousy\n##xton\n##hm\n##uki\nlena\ngdp\ncolorful\n##dley\nstein\nkidney\n##shu\nunderwear\nwanderers\n##haw\n##icus\nguardians\nm³\nroared\nhabits\n##wise\npermits\ngp\nuranium\npunished\ndisguise\nbundesliga\nelise\ndundee\nerotic\npartisan\npi\ncollectors\nfloat\nindividually\nrendering\nbehavioral\nbucharest\nser\nhare\nvalerie\ncorporal\nnutrition\nproportional\n##isa\nimmense\n##kis\npavement\n##zie\n##eld\nsutherland\ncrouched\n1775\n##lp\nsuzuki\ntrades\nendurance\noperas\ncrosby\nprayed\npriory\nrory\nsocially\n##urn\ngujarat\n##pu\nwalton\ncube\npasha\nprivilege\nlennon\nfloods\nthorne\nwaterfall\nnipple\nscouting\napprove\n##lov\nminorities\nvoter\ndwight\nextensions\nassure\nballroom\nslap\ndripping\nprivileges\nrejoined\nconfessed\ndemonstrating\npatriotic\nyell\ninvestor\n##uth\npagan\nslumped\nsquares\n##cle\n##kins\nconfront\nbert\nembarrassment\n##aid\naston\nurging\nsweater\nstarr\nyuri\nbrains\nwilliamson\ncommuter\nmortar\nstructured\nselfish\nexports\n##jon\ncds\n##him\nunfinished\n##rre\nmortgage\ndestinations\n##nagar\ncanoe\nsolitary\nbuchanan\ndelays\nmagistrate\nfk\n##pling\nmotivation\n##lier\n##vier\nrecruiting\nassess\n##mouth\nmalik\nantique\n1791\npius\nrahman\nreich\ntub\nzhou\nsmashed\nairs\ngalway\nxii\nconditioning\nhonduras\ndischarged\ndexter\n##pf\nlionel\n129\ndebates\nlemon\ntiffany\nvolunteered\ndom\ndioxide\nprocession\ndevi\nsic\ntremendous\nadvertisements\ncolts\ntransferring\nverdict\nhanover\ndecommissioned\nutter\nrelate\npac\nracism\n##top\nbeacon\nlimp\nsimilarity\nterra\noccurrence\nant\n##how\nbecky\ncapt\nupdates\narmament\nrichie\npal\n##graph\nhalloween\nmayo\n##ssen\n##bone\ncara\nserena\nfcc\ndolls\nobligations\n##dling\nviolated\nlafayette\njakarta\nexploitation\n##ime\ninfamous\niconic\n##lah\n##park\nkitty\nmoody\nreginald\ndread\nspill\ncrystals\nolivier\nmodeled\nbluff\nequilibrium\nseparating\nnotices\nordnance\nextinction\nonset\ncosmic\nattachment\nsammy\nexpose\nprivy\nanchored\n##bil\nabbott\nadmits\nbending\nbaritone\nemmanuel\npoliceman\nvaughan\nwinged\nclimax\ndresses\ndenny\npolytechnic\nmohamed\nburmese\nauthentic\nnikki\ngenetics\ngrandparents\nhomestead\ngaza\npostponed\nmetacritic\nuna\n##sby\n##bat\nunstable\ndissertation\n##rial\n##cian\ncurls\nobscure\nuncovered\nbronx\npraying\ndisappearing\n##hoe\nprehistoric\ncoke\nturret\nmutations\nnonprofit\npits\nmonaco\n##ي\n##usion\nprominently\ndispatched\npodium\n##mir\nuci\n##uation\n133\nfortifications\nbirthplace\nkendall\n##lby\n##oll\npreacher\nrack\ngoodman\n##rman\npersistent\n##ott\ncountless\njaime\nrecorder\nlexington\npersecution\njumps\nrenewal\nwagons\n##11\ncrushing\n##holder\ndecorations\n##lake\nabundance\nwrath\nlaundry\n£1\ngarde\n##rp\njeanne\nbeetles\npeasant\n##sl\nsplitting\ncaste\nsergei\n##rer\n##ema\nscripts\n##ively\nrub\nsatellites\n##vor\ninscribed\nverlag\nscrapped\ngale\npackages\nchick\npotato\nslogan\nkathleen\narabs\n##culture\ncounterparts\nreminiscent\nchoral\n##tead\nrand\nretains\nbushes\ndane\naccomplish\ncourtesy\ncloses\n##oth\nslaughter\nhague\nkrakow\nlawson\ntailed\nelias\nginger\n##ttes\ncanopy\nbetrayal\nrebuilding\nturf\n##hof\nfrowning\nallegiance\nbrigades\nkicks\nrebuild\npolls\nalias\nnationalism\ntd\nrowan\naudition\nbowie\nfortunately\nrecognizes\nharp\ndillon\nhorrified\n##oro\nrenault\n##tics\nropes\n##α\npresumed\nrewarded\ninfrared\nwiping\naccelerated\nillustration\n##rid\npresses\npractitioners\nbadminton\n##iard\ndetained\n##tera\nrecognizing\nrelates\nmisery\n##sies\n##tly\nreproduction\npiercing\npotatoes\nthornton\nesther\nmanners\nhbo\n##aan\nours\nbullshit\nernie\nperennial\nsensitivity\nilluminated\nrupert\n##jin\n##iss\n##ear\nrfc\nnassau\n##dock\nstaggered\nsocialism\n##haven\nappointments\nnonsense\nprestige\nsharma\nhaul\n##tical\nsolidarity\ngps\n##ook\n##rata\nigor\npedestrian\n##uit\nbaxter\ntenants\nwires\nmedication\nunlimited\nguiding\nimpacts\ndiabetes\n##rama\nsasha\npas\nclive\nextraction\n131\ncontinually\nconstraints\n##bilities\nsonata\nhunted\nsixteenth\nchu\nplanting\nquote\nmayer\npretended\nabs\nspat\n##hua\nceramic\n##cci\ncurtains\npigs\npitching\n##dad\nlatvian\nsore\ndayton\n##sted\n##qi\npatrols\nslice\nplayground\n##nted\nshone\nstool\napparatus\ninadequate\nmates\ntreason\n##ija\ndesires\n##liga\n##croft\nsomalia\nlaurent\nmir\nleonardo\noracle\ngrape\nobliged\nchevrolet\nthirteenth\nstunning\nenthusiastic\n##ede\naccounted\nconcludes\ncurrents\nbasil\n##kovic\ndrought\n##rica\nmai\n##aire\nshove\nposting\n##shed\npilgrimage\nhumorous\npacking\nfry\npencil\nwines\nsmells\n144\nmarilyn\naching\nnewest\nclung\nbon\nneighbours\nsanctioned\n##pie\nmug\n##stock\ndrowning\n##mma\nhydraulic\n##vil\nhiring\nreminder\nlilly\ninvestigators\n##ncies\nsour\n##eous\ncompulsory\npacket\n##rion\n##graphic\n##elle\ncannes\n##inate\ndepressed\n##rit\nheroic\nimportantly\ntheresa\n##tled\nconway\nsaturn\nmarginal\nrae\n##xia\ncorresponds\nroyce\npact\njasper\nexplosives\npackaging\naluminium\n##ttered\ndenotes\nrhythmic\nspans\nassignments\nhereditary\noutlined\noriginating\nsundays\nlad\nreissued\ngreeting\nbeatrice\n##dic\npillar\nmarcos\nplots\nhandbook\nalcoholic\njudiciary\navant\nslides\nextract\nmasculine\nblur\n##eum\n##force\nhomage\ntrembled\nowens\nhymn\ntrey\nomega\nsignaling\nsocks\naccumulated\nreacted\nattic\ntheo\nlining\nangie\ndistraction\nprimera\ntalbot\n##key\n1200\nti\ncreativity\nbilled\n##hey\ndeacon\neduardo\nidentifies\nproposition\ndizzy\ngunner\nhogan\n##yam\n##pping\n##hol\nja\n##chan\njensen\nreconstructed\n##berger\nclearance\ndarius\n##nier\nabe\nharlem\nplea\ndei\ncircled\nemotionally\nnotation\nfascist\nneville\nexceeded\nupwards\nviable\nducks\n##fo\nworkforce\nracer\nlimiting\nshri\n##lson\npossesses\n1600\nkerr\nmoths\ndevastating\nladen\ndisturbing\nlocking\n##cture\ngal\nfearing\naccreditation\nflavor\naide\n1870s\nmountainous\n##baum\nmelt\n##ures\nmotel\ntexture\nservers\nsoda\n##mb\nherd\n##nium\nerect\npuzzled\nhum\npeggy\nexaminations\ngould\ntestified\ngeoff\nren\ndevised\nsacks\n##law\ndenial\nposters\ngrunted\ncesar\ntutor\nec\ngerry\nofferings\nbyrne\nfalcons\ncombinations\nct\nincoming\npardon\nrocking\n26th\navengers\nflared\nmankind\nseller\nuttar\nloch\nnadia\nstroking\nexposing\n##hd\nfertile\nancestral\ninstituted\n##has\nnoises\nprophecy\ntaxation\neminent\nvivid\npol\n##bol\ndart\nindirect\nmultimedia\nnotebook\nupside\ndisplaying\nadrenaline\nreferenced\ngeometric\n##iving\nprogression\n##ddy\nblunt\nannounce\n##far\nimplementing\n##lav\naggression\nliaison\ncooler\ncares\nheadache\nplantations\ngorge\ndots\nimpulse\nthickness\nashamed\naveraging\nkathy\nobligation\nprecursor\n137\nfowler\nsymmetry\nthee\n225\nhears\n##rai\nundergoing\nads\nbutcher\nbowler\n##lip\ncigarettes\nsubscription\ngoodness\n##ically\nbrowne\n##hos\n##tech\nkyoto\ndonor\n##erty\ndamaging\nfriction\ndrifting\nexpeditions\nhardened\nprostitution\n152\nfauna\nblankets\nclaw\ntossing\nsnarled\nbutterflies\nrecruits\ninvestigative\ncoated\nhealed\n138\ncommunal\nhai\nxiii\nacademics\nboone\npsychologist\nrestless\nlahore\nstephens\nmba\nbrendan\nforeigners\nprinter\n##pc\nached\nexplode\n27th\ndeed\nscratched\ndared\n##pole\ncardiac\n1780\nokinawa\nproto\ncommando\ncompelled\noddly\nelectrons\n##base\nreplica\nthanksgiving\n##rist\nsheila\ndeliberate\nstafford\ntidal\nrepresentations\nhercules\nou\n##path\n##iated\nkidnapping\nlenses\n##tling\ndeficit\nsamoa\nmouths\nconsuming\ncomputational\nmaze\ngranting\nsmirk\nrazor\nfixture\nideals\ninviting\naiden\nnominal\n##vs\nissuing\njulio\npitt\nramsey\ndocks\n##oss\nexhaust\n##owed\nbavarian\ndraped\nanterior\nmating\nethiopian\nexplores\nnoticing\n##nton\ndiscarded\nconvenience\nhoffman\nendowment\nbeasts\ncartridge\nmormon\npaternal\nprobe\nsleeves\ninterfere\nlump\ndeadline\n##rail\njenks\nbulldogs\nscrap\nalternating\njustified\nreproductive\nnam\nseize\ndescending\nsecretariat\nkirby\ncoupe\ngrouped\nsmash\npanther\nsedan\ntapping\n##18\nlola\ncheer\ngermanic\nunfortunate\n##eter\nunrelated\n##fan\nsubordinate\n##sdale\nsuzanne\nadvertisement\n##ility\nhorsepower\n##lda\ncautiously\ndiscourse\nluigi\n##mans\n##fields\nnoun\nprevalent\nmao\nschneider\neverett\nsurround\ngovernorate\nkira\n##avia\nwestward\n##take\nmisty\nrails\nsustainability\n134\nunused\n##rating\npacks\ntoast\nunwilling\nregulate\nthy\nsuffrage\nnile\nawe\nassam\ndefinitions\ntravelers\naffordable\n##rb\nconferred\nsells\nundefeated\nbeneficial\ntorso\nbasal\nrepeating\nremixes\n##pass\nbahrain\ncables\nfang\n##itated\nexcavated\nnumbering\nstatutory\n##rey\ndeluxe\n##lian\nforested\nramirez\nderbyshire\nzeus\nslamming\ntransfers\nastronomer\nbanana\nlottery\nberg\nhistories\nbamboo\n##uchi\nresurrection\nposterior\nbowls\nvaguely\n##thi\nthou\npreserving\ntensed\noffence\n##inas\nmeyrick\ncallum\nridden\nwatt\nlangdon\ntying\nlowland\nsnorted\ndaring\ntruman\n##hale\n##girl\naura\noverly\nfiling\nweighing\ngoa\ninfections\nphilanthropist\nsaunders\neponymous\n##owski\nlatitude\nperspectives\nreviewing\nmets\ncommandant\nradial\n##kha\nflashlight\nreliability\nkoch\nvowels\namazed\nada\nelaine\nsupper\n##rth\n##encies\npredator\ndebated\nsoviets\ncola\n##boards\n##nah\ncompartment\ncrooked\narbitrary\nfourteenth\n##ctive\nhavana\nmajors\nsteelers\nclips\nprofitable\nambush\nexited\npackers\n##tile\nnude\ncracks\nfungi\n##е\nlimb\ntrousers\njosie\nshelby\ntens\nfrederic\n##ος\ndefinite\nsmoothly\nconstellation\ninsult\nbaton\ndiscs\nlingering\n##nco\nconclusions\nlent\nstaging\nbecker\ngrandpa\nshaky\n##tron\neinstein\nobstacles\nsk\nadverse\nelle\neconomically\n##moto\nmccartney\nthor\ndismissal\nmotions\nreadings\nnostrils\ntreatise\n##pace\nsqueezing\nevidently\nprolonged\n1783\nvenezuelan\nje\nmarguerite\nbeirut\ntakeover\nshareholders\n##vent\ndenise\ndigit\nairplay\nnorse\n##bbling\nimaginary\npills\nhubert\nblaze\nvacated\neliminating\n##ello\nvine\nmansfield\n##tty\nretrospective\nbarrow\nborne\nclutch\nbail\nforensic\nweaving\n##nett\n##witz\ndesktop\ncitadel\npromotions\nworrying\ndorset\nieee\nsubdivided\n##iating\nmanned\nexpeditionary\npickup\nsynod\nchuckle\n185\nbarney\n##rz\n##ffin\nfunctionality\nkarachi\nlitigation\nmeanings\nuc\nlick\nturbo\nanders\n##ffed\nexecute\ncurl\noppose\nankles\ntyphoon\n##د\n##ache\n##asia\nlinguistics\ncompassion\npressures\ngrazing\nperfection\n##iting\nimmunity\nmonopoly\nmuddy\nbackgrounds\n136\nnamibia\nfrancesca\nmonitors\nattracting\nstunt\ntuition\n##ии\nvegetable\n##mates\n##quent\nmgm\njen\ncomplexes\nforts\n##ond\ncellar\nbites\nseventeenth\nroyals\nflemish\nfailures\nmast\ncharities\n##cular\nperuvian\ncapitals\nmacmillan\nipswich\noutward\nfrigate\npostgraduate\nfolds\nemploying\n##ouse\nconcurrently\nfiery\n##tai\ncontingent\nnightmares\nmonumental\nnicaragua\n##kowski\nlizard\nmal\nfielding\ngig\nreject\n##pad\nharding\n##ipe\ncoastline\n##cin\n##nos\nbeethoven\nhumphrey\ninnovations\n##tam\n##nge\nnorris\ndoris\nsolicitor\nhuang\nobey\n141\n##lc\nniagara\n##tton\nshelves\naug\nbourbon\ncurry\nnightclub\nspecifications\nhilton\n##ndo\ncentennial\ndispersed\nworm\nneglected\nbriggs\nsm\nfont\nkuala\nuneasy\nplc\n##nstein\n##bound\n##aking\n##burgh\nawaiting\npronunciation\n##bbed\n##quest\neh\noptimal\nzhu\nraped\ngreens\npresided\nbrenda\nworries\n##life\nvenetian\nmarxist\nturnout\n##lius\nrefined\nbraced\nsins\ngrasped\nsunderland\nnickel\nspeculated\nlowell\ncyrillic\ncommunism\nfundraising\nresembling\ncolonists\nmutant\nfreddie\nusc\n##mos\ngratitude\n##run\nmural\n##lous\nchemist\nwi\nreminds\n28th\nsteals\ntess\npietro\n##ingen\npromoter\nri\nmicrophone\nhonoured\nrai\nsant\n##qui\nfeather\n##nson\nburlington\nkurdish\nterrorists\ndeborah\nsickness\n##wed\n##eet\nhazard\nirritated\ndesperation\nveil\nclarity\n##rik\njewels\nxv\n##gged\n##ows\n##cup\nberkshire\nunfair\nmysteries\norchid\nwinced\nexhaustion\nrenovations\nstranded\nobe\ninfinity\n##nies\nadapt\nredevelopment\nthanked\nregistry\nolga\ndomingo\nnoir\ntudor\nole\n##atus\ncommenting\nbehaviors\n##ais\ncrisp\npauline\nprobable\nstirling\nwigan\n##bian\nparalympics\npanting\nsurpassed\n##rew\nluca\nbarred\npony\nfamed\n##sters\ncassandra\nwaiter\ncarolyn\nexported\n##orted\nandres\ndestructive\ndeeds\njonah\ncastles\nvacancy\nsuv\n##glass\n1788\norchard\nyep\nfamine\nbelarusian\nsprang\n##forth\nskinny\n##mis\nadministrators\nrotterdam\nzambia\nzhao\nboiler\ndiscoveries\n##ride\n##physics\nlucius\ndisappointing\noutreach\nspoon\n##frame\nqualifications\nunanimously\nenjoys\nregency\n##iidae\nstade\nrealism\nveterinary\nrodgers\ndump\nalain\nchestnut\ncastile\ncensorship\nrumble\ngibbs\n##itor\ncommunion\nreggae\ninactivated\nlogs\nloads\n##houses\nhomosexual\n##iano\nale\ninforms\n##cas\nphrases\nplaster\nlinebacker\nambrose\nkaiser\nfascinated\n850\nlimerick\nrecruitment\nforge\nmastered\n##nding\nleinster\nrooted\nthreaten\n##strom\nborneo\n##hes\nsuggestions\nscholarships\npropeller\ndocumentaries\npatronage\ncoats\nconstructing\ninvest\nneurons\ncomet\nentirety\nshouts\nidentities\nannoying\nunchanged\nwary\n##antly\n##ogy\nneat\noversight\n##kos\nphillies\nreplay\nconstance\n##kka\nincarnation\nhumble\nskies\nminus\n##acy\nsmithsonian\n##chel\nguerrilla\njar\ncadets\n##plate\nsurplus\naudit\n##aru\ncracking\njoanna\nlouisa\npacing\n##lights\nintentionally\n##iri\ndiner\nnwa\nimprint\naustralians\ntong\nunprecedented\nbunker\nnaive\nspecialists\nark\nnichols\nrailing\nleaked\npedal\n##uka\nshrub\nlonging\nroofs\nv8\ncaptains\nneural\ntuned\n##ntal\n##jet\nemission\nmedina\nfrantic\ncodex\ndefinitive\nsid\nabolition\nintensified\nstocks\nenrique\nsustain\ngenoa\noxide\n##written\nclues\ncha\n##gers\ntributaries\nfragment\nvenom\n##rity\n##ente\n##sca\nmuffled\nvain\nsire\nlaos\n##ingly\n##hana\nhastily\nsnapping\nsurfaced\nsentiment\nmotive\n##oft\ncontests\napproximate\nmesa\nluckily\ndinosaur\nexchanges\npropelled\naccord\nbourne\nrelieve\ntow\nmasks\noffended\n##ues\ncynthia\n##mmer\nrains\nbartender\nzinc\nreviewers\nlois\n##sai\nlegged\narrogant\nrafe\nrosie\ncomprise\nhandicap\nblockade\ninlet\nlagoon\ncopied\ndrilling\nshelley\npetals\n##inian\nmandarin\nobsolete\n##inated\nonward\narguably\nproductivity\ncindy\npraising\nseldom\nbusch\ndiscusses\nraleigh\nshortage\nranged\nstanton\nencouragement\nfirstly\nconceded\novers\ntemporal\n##uke\ncbe\n##bos\nwoo\ncertainty\npumps\n##pton\nstalked\n##uli\nlizzie\nperiodic\nthieves\nweaker\n##night\ngases\nshoving\nchooses\nwc\n##chemical\nprompting\nweights\n##kill\nrobust\nflanked\nsticky\nhu\ntuberculosis\n##eb\n##eal\nchristchurch\nresembled\nwallet\nreese\ninappropriate\npictured\ndistract\nfixing\nfiddle\ngiggled\nburger\nheirs\nhairy\nmechanic\ntorque\napache\nobsessed\nchiefly\ncheng\nlogging\n##tag\nextracted\nmeaningful\nnumb\n##vsky\ngloucestershire\nreminding\n##bay\nunite\n##lit\nbreeds\ndiminished\nclown\nglove\n1860s\n##ن\n##ug\narchibald\nfocal\nfreelance\nsliced\ndepiction\n##yk\norganism\nswitches\nsights\nstray\ncrawling\n##ril\nlever\nleningrad\ninterpretations\nloops\nanytime\nreel\nalicia\ndelighted\n##ech\ninhaled\nxiv\nsuitcase\nbernie\nvega\nlicenses\nnorthampton\nexclusion\ninduction\nmonasteries\nracecourse\nhomosexuality\n##right\n##sfield\n##rky\ndimitri\nmichele\nalternatives\nions\ncommentators\ngenuinely\nobjected\npork\nhospitality\nfencing\nstephan\nwarships\nperipheral\nwit\ndrunken\nwrinkled\nquentin\nspends\ndeparting\nchung\nnumerical\nspokesperson\n##zone\njohannesburg\ncaliber\nkillers\n##udge\nassumes\nneatly\ndemographic\nabigail\nbloc\n##vel\nmounting\n##lain\nbentley\nslightest\nxu\nrecipients\n##jk\nmerlin\n##writer\nseniors\nprisons\nblinking\nhindwings\nflickered\nkappa\n##hel\n80s\nstrengthening\nappealing\nbrewing\ngypsy\nmali\nlashes\nhulk\nunpleasant\nharassment\nbio\ntreaties\npredict\ninstrumentation\npulp\ntroupe\nboiling\nmantle\n##ffe\nins\n##vn\ndividing\nhandles\nverbs\n##onal\ncoconut\nsenegal\n340\nthorough\ngum\nmomentarily\n##sto\ncocaine\npanicked\ndestined\n##turing\nteatro\ndenying\nweary\ncaptained\nmans\n##hawks\n##code\nwakefield\nbollywood\nthankfully\n##16\ncyril\n##wu\namendments\n##bahn\nconsultation\nstud\nreflections\nkindness\n1787\ninternally\n##ovo\ntex\nmosaic\ndistribute\npaddy\nseeming\n143\n##hic\npiers\n##15\n##mura\n##verse\npopularly\nwinger\nkang\nsentinel\nmccoy\n##anza\ncovenant\n##bag\nverge\nfireworks\nsuppress\nthrilled\ndominate\n##jar\nswansea\n##60\n142\nreconciliation\n##ndi\nstiffened\ncue\ndorian\n##uf\ndamascus\namor\nida\nforemost\n##aga\nporsche\nunseen\ndir\n##had\n##azi\nstony\nlexi\nmelodies\n##nko\nangular\ninteger\npodcast\nants\ninherent\njaws\njustify\npersona\n##olved\njosephine\n##nr\n##ressed\ncustomary\nflashes\ngala\ncyrus\nglaring\nbackyard\nariel\nphysiology\ngreenland\nhtml\nstir\navon\natletico\nfinch\nmethodology\nked\n##lent\nmas\ncatholicism\ntownsend\nbranding\nquincy\nfits\ncontainers\n1777\nashore\naragon\n##19\nforearm\npoisoning\n##sd\nadopting\nconquer\ngrinding\namnesty\nkeller\nfinances\nevaluate\nforged\nlankan\ninstincts\n##uto\nguam\nbosnian\nphotographed\nworkplace\ndesirable\nprotector\n##dog\nallocation\nintently\nencourages\nwilly\n##sten\nbodyguard\nelectro\nbrighter\n##ν\nbihar\n##chev\nlasts\nopener\namphibious\nsal\nverde\narte\n##cope\ncaptivity\nvocabulary\nyields\n##tted\nagreeing\ndesmond\npioneered\n##chus\nstrap\ncampaigned\nrailroads\n##ович\nemblem\n##dre\nstormed\n501\n##ulous\nmarijuana\nnorthumberland\n##gn\n##nath\nbowen\nlandmarks\nbeaumont\n##qua\ndanube\n##bler\nattorneys\nth\nge\nflyers\ncritique\nvillains\ncass\nmutation\nacc\n##0s\ncolombo\nmckay\nmotif\nsampling\nconcluding\nsyndicate\n##rell\nneon\nstables\nds\nwarnings\nclint\nmourning\nwilkinson\n##tated\nmerrill\nleopard\nevenings\nexhaled\nemil\nsonia\nezra\ndiscrete\nstove\nfarrell\nfifteenth\nprescribed\nsuperhero\n##rier\nworms\nhelm\nwren\n##duction\n##hc\nexpo\n##rator\nhq\nunfamiliar\nantony\nprevents\nacceleration\nfiercely\nmari\npainfully\ncalculations\ncheaper\nign\nclifton\nirvine\ndavenport\nmozambique\n##np\npierced\n##evich\nwonders\n##wig\n##cate\n##iling\ncrusade\nware\n##uel\nenzymes\nreasonably\nmls\n##coe\nmater\nambition\nbunny\neliot\nkernel\n##fin\nasphalt\nheadmaster\ntorah\naden\nlush\npins\nwaived\n##care\n##yas\njoao\nsubstrate\nenforce\n##grad\n##ules\nalvarez\nselections\nepidemic\ntempted\n##bit\nbremen\ntranslates\nensured\nwaterfront\n29th\nforrest\nmanny\nmalone\nkramer\nreigning\ncookies\nsimpler\nabsorption\n205\nengraved\n##ffy\nevaluated\n1778\nhaze\n146\ncomforting\ncrossover\n##abe\nthorn\n##rift\n##imo\n##pop\nsuppression\nfatigue\ncutter\n##tr\n201\nwurttemberg\n##orf\nenforced\nhovering\nproprietary\ngb\nsamurai\nsyllable\nascent\nlacey\ntick\nlars\ntractor\nmerchandise\nrep\nbouncing\ndefendants\n##yre\nhuntington\n##ground\n##oko\nstandardized\n##hor\n##hima\nassassinated\nnu\npredecessors\nrainy\nliar\nassurance\nlyrical\n##uga\nsecondly\nflattened\nios\nparameter\nundercover\n##mity\nbordeaux\npunish\nridges\nmarkers\nexodus\ninactive\nhesitate\ndebbie\nnyc\npledge\nsavoy\nnagar\noffset\norganist\n##tium\nhesse\nmarin\nconverting\n##iver\ndiagram\npropulsion\npu\nvalidity\nreverted\nsupportive\n##dc\nministries\nclans\nresponds\nproclamation\n##inae\n##ø\n##rea\nein\npleading\npatriot\nsf\nbirch\nislanders\nstrauss\nhates\n##dh\nbrandenburg\nconcession\nrd\n##ob\n1900s\nkillings\ntextbook\nantiquity\ncinematography\nwharf\nembarrassing\nsetup\ncreed\nfarmland\ninequality\ncentred\nsignatures\nfallon\n370\n##ingham\n##uts\nceylon\ngazing\ndirective\nlaurie\n##tern\nglobally\n##uated\n##dent\nallah\nexcavation\nthreads\n##cross\n148\nfrantically\nicc\nutilize\ndetermines\nrespiratory\nthoughtful\nreceptions\n##dicate\nmerging\nchandra\nseine\n147\nbuilders\nbuilds\ndiagnostic\ndev\nvisibility\ngoddamn\nanalyses\ndhaka\ncho\nproves\nchancel\nconcurrent\ncuriously\ncanadians\npumped\nrestoring\n1850s\nturtles\njaguar\nsinister\nspinal\ntraction\ndeclan\nvows\n1784\nglowed\ncapitalism\nswirling\ninstall\nuniversidad\n##lder\n##oat\nsoloist\n##genic\n##oor\ncoincidence\nbeginnings\nnissan\ndip\nresorts\ncaucasus\ncombustion\ninfectious\n##eno\npigeon\nserpent\n##itating\nconclude\nmasked\nsalad\njew\n##gr\nsurreal\ntoni\n##wc\nharmonica\n151\n##gins\n##etic\n##coat\nfishermen\nintending\nbravery\n##wave\nklaus\ntitan\nwembley\ntaiwanese\nransom\n40th\nincorrect\nhussein\neyelids\njp\ncooke\ndramas\nutilities\n##etta\n##print\neisenhower\nprincipally\ngranada\nlana\n##rak\nopenings\nconcord\n##bl\nbethany\nconnie\nmorality\nsega\n##mons\n##nard\nearnings\n##kara\n##cine\nwii\ncommunes\n##rel\ncoma\ncomposing\nsoftened\nsevered\ngrapes\n##17\nnguyen\nanalyzed\nwarlord\nhubbard\nheavenly\nbehave\nslovenian\n##hit\n##ony\nhailed\nfilmmakers\ntrance\ncaldwell\nskye\nunrest\ncoward\nlikelihood\n##aging\nbern\nsci\ntaliban\nhonolulu\npropose\n##wang\n1700\nbrowser\nimagining\ncobra\ncontributes\ndukes\ninstinctively\nconan\nviolinist\n##ores\naccessories\ngradual\n##amp\nquotes\nsioux\n##dating\nundertake\nintercepted\nsparkling\ncompressed\n139\nfungus\ntombs\nhaley\nimposing\nrests\ndegradation\nlincolnshire\nretailers\nwetlands\ntulsa\ndistributor\ndungeon\nnun\ngreenhouse\nconvey\natlantis\naft\nexits\noman\ndresser\nlyons\n##sti\njoking\neddy\njudgement\nomitted\ndigits\n##cts\n##game\njuniors\n##rae\ncents\nstricken\nune\n##ngo\nwizards\nweir\nbreton\nnan\ntechnician\nfibers\nliking\nroyalty\n##cca\n154\npersia\nterribly\nmagician\n##rable\n##unt\nvance\ncafeteria\nbooker\ncamille\nwarmer\n##static\nconsume\ncavern\ngaps\ncompass\ncontemporaries\nfoyer\nsoothing\ngraveyard\nmaj\nplunged\nblush\n##wear\ncascade\ndemonstrates\nordinance\n##nov\nboyle\n##lana\nrockefeller\nshaken\nbanjo\nizzy\n##ense\nbreathless\nvines\n##32\n##eman\nalterations\nchromosome\ndwellings\nfeudal\nmole\n153\ncatalonia\nrelics\ntenant\nmandated\n##fm\nfridge\nhats\nhonesty\npatented\nraul\nheap\ncruisers\naccusing\nenlightenment\ninfants\nwherein\nchatham\ncontractors\nzen\naffinity\nhc\nosborne\npiston\n156\ntraps\nmaturity\n##rana\nlagos\n##zal\npeering\n##nay\nattendant\ndealers\nprotocols\nsubset\nprospects\nbiographical\n##cre\nartery\n##zers\ninsignia\nnuns\nendured\n##eration\nrecommend\nschwartz\nserbs\nberger\ncromwell\ncrossroads\n##ctor\nenduring\nclasped\ngrounded\n##bine\nmarseille\ntwitched\nabel\nchoke\nhttps\ncatalyst\nmoldova\nitalians\n##tist\ndisastrous\nwee\n##oured\n##nti\nwwf\nnope\n##piration\n##asa\nexpresses\nthumbs\n167\n##nza\ncoca\n1781\ncheating\n##ption\nskipped\nsensory\nheidelberg\nspies\nsatan\ndangers\nsemifinal\n202\nbohemia\nwhitish\nconfusing\nshipbuilding\nrelies\nsurgeons\nlandings\nravi\nbaku\nmoor\nsuffix\nalejandro\n##yana\nlitre\nupheld\n##unk\nrajasthan\n##rek\ncoaster\ninsists\nposture\nscenarios\netienne\nfavoured\nappoint\ntransgender\nelephants\npoked\ngreenwood\ndefences\nfulfilled\nmilitant\nsomali\n1758\nchalk\npotent\n##ucci\nmigrants\nwink\nassistants\nnos\nrestriction\nactivism\nniger\n##ario\ncolon\nshaun\n##sat\ndaphne\n##erated\nswam\ncongregations\nreprise\nconsiderations\nmagnet\nplayable\nxvi\n##р\noverthrow\ntobias\nknob\nchavez\ncoding\n##mers\npropped\nkatrina\norient\nnewcomer\n##suke\ntemperate\n##pool\nfarmhouse\ninterrogation\n##vd\ncommitting\n##vert\nforthcoming\nstrawberry\njoaquin\nmacau\nponds\nshocking\nsiberia\n##cellular\nchant\ncontributors\n##nant\n##ologists\nsped\nabsorb\nhail\n1782\nspared\n##hore\nbarbados\nkarate\nopus\noriginates\nsaul\n##xie\nevergreen\nleaped\n##rock\ncorrelation\nexaggerated\nweekday\nunification\nbump\ntracing\nbrig\nafb\npathways\nutilizing\n##ners\nmod\nmb\ndisturbance\nkneeling\n##stad\n##guchi\n100th\npune\n##thy\ndecreasing\n168\nmanipulation\nmiriam\nacademia\necosystem\noccupational\nrbi\n##lem\nrift\n##14\nrotary\nstacked\nincorporation\nawakening\ngenerators\nguerrero\nracist\n##omy\ncyber\nderivatives\nculminated\nallie\nannals\npanzer\nsainte\nwikipedia\npops\nzu\naustro\n##vate\nalgerian\npolitely\nnicholson\nmornings\neducate\ntastes\nthrill\ndartmouth\n##gating\ndb\n##jee\nregan\ndiffering\nconcentrating\nchoreography\ndivinity\n##media\npledged\nalexandre\nrouting\ngregor\nmadeline\n##idal\napocalypse\n##hora\ngunfire\nculminating\nelves\nfined\nliang\nlam\nprogrammed\ntar\nguessing\ntransparency\ngabrielle\n##gna\ncancellation\nflexibility\n##lining\naccession\nshea\nstronghold\nnets\nspecializes\n##rgan\nabused\nhasan\nsgt\nling\nexceeding\n##₄\nadmiration\nsupermarket\n##ark\nphotographers\nspecialised\ntilt\nresonance\nhmm\nperfume\n380\nsami\nthreatens\ngarland\nbotany\nguarding\nboiled\ngreet\npuppy\nrusso\nsupplier\nwilmington\nvibrant\nvijay\n##bius\nparalympic\ngrumbled\npaige\nfaa\nlicking\nmargins\nhurricanes\n##gong\nfest\ngrenade\nripping\n##uz\ncounseling\nweigh\n##sian\nneedles\nwiltshire\nedison\ncostly\n##not\nfulton\ntramway\nredesigned\nstaffordshire\ncache\ngasping\nwatkins\nsleepy\ncandidacy\n##group\nmonkeys\ntimeline\nthrobbing\n##bid\n##sos\nberth\nuzbekistan\nvanderbilt\nbothering\noverturned\nballots\ngem\n##iger\nsunglasses\nsubscribers\nhooker\ncompelling\nang\nexceptionally\nsaloon\nstab\n##rdi\ncarla\nterrifying\nrom\n##vision\ncoil\n##oids\nsatisfying\nvendors\n31st\nmackay\ndeities\noverlooked\nambient\nbahamas\nfelipe\nolympia\nwhirled\nbotanist\nadvertised\ntugging\n##dden\ndisciples\nmorales\nunionist\nrites\nfoley\nmorse\nmotives\ncreepy\n##₀\nsoo\n##sz\nbargain\nhighness\nfrightening\nturnpike\ntory\nreorganization\n##cer\ndepict\nbiographer\n##walk\nunopposed\nmanifesto\n##gles\ninstitut\nemile\naccidental\nkapoor\n##dam\nkilkenny\ncortex\nlively\n##13\nromanesque\njain\nshan\ncannons\n##ood\n##ske\npetrol\nechoing\namalgamated\ndisappears\ncautious\nproposes\nsanctions\ntrenton\n##ر\nflotilla\naus\ncontempt\ntor\ncanary\ncote\ntheirs\n##hun\nconceptual\ndeleted\nfascinating\npaso\nblazing\nelf\nhonourable\nhutchinson\n##eiro\n##outh\n##zin\nsurveyor\ntee\namidst\nwooded\nreissue\nintro\n##ono\ncobb\nshelters\nnewsletter\nhanson\nbrace\nencoding\nconfiscated\ndem\ncaravan\nmarino\nscroll\nmelodic\ncows\nimam\n##adi\n##aneous\nnorthward\nsearches\nbiodiversity\ncora\n310\nroaring\n##bers\nconnell\ntheologian\nhalo\ncompose\npathetic\nunmarried\ndynamo\n##oot\naz\ncalculation\ntoulouse\ndeserves\nhumour\nnr\nforgiveness\ntam\nundergone\nmartyr\npamela\nmyths\nwhore\ncounselor\nhicks\n290\nheavens\nbattleship\nelectromagnetic\n##bbs\nstellar\nestablishments\npresley\nhopped\n##chin\ntemptation\n90s\nwills\nnas\n##yuan\nnhs\n##nya\nseminars\n##yev\nadaptations\ngong\nasher\nlex\nindicator\nsikh\ntobago\ncites\ngoin\n##yte\nsatirical\n##gies\ncharacterised\ncorrespond\nbubbles\nlure\nparticipates\n##vid\neruption\nskate\ntherapeutic\n1785\ncanals\nwholesale\ndefaulted\nsac\n460\npetit\n##zzled\nvirgil\nleak\nravens\n256\nportraying\n##yx\nghetto\ncreators\ndams\nportray\nvicente\n##rington\nfae\nnamesake\nbounty\n##arium\njoachim\n##ota\n##iser\naforementioned\naxle\nsnout\ndepended\ndismantled\nreuben\n480\n##ibly\ngallagher\n##lau\n##pd\nearnest\n##ieu\n##iary\ninflicted\nobjections\n##llar\nasa\ngritted\n##athy\njericho\n##sea\n##was\nflick\nunderside\nceramics\nundead\nsubstituted\n195\neastward\nundoubtedly\nwheeled\nchimney\n##iche\nguinness\ncb\n##ager\nsiding\n##bell\ntraitor\nbaptiste\ndisguised\ninauguration\n149\ntipperary\nchoreographer\nperched\nwarmed\nstationary\neco\n##ike\n##ntes\nbacterial\n##aurus\nflores\nphosphate\n##core\nattacker\ninvaders\nalvin\nintersects\na1\nindirectly\nimmigrated\nbusinessmen\ncornelius\nvalves\nnarrated\npill\nsober\nul\nnationale\nmonastic\napplicants\nscenery\n##jack\n161\nmotifs\nconstitutes\ncpu\n##osh\njurisdictions\nsd\ntuning\nirritation\nwoven\n##uddin\nfertility\ngao\n##erie\nantagonist\nimpatient\nglacial\nhides\nboarded\ndenominations\ninterception\n##jas\ncookie\nnicola\n##tee\nalgebraic\nmarquess\nbahn\nparole\nbuyers\nbait\nturbines\npaperwork\nbestowed\nnatasha\nrenee\noceans\npurchases\n157\nvaccine\n215\n##tock\nfixtures\nplayhouse\nintegrate\njai\noswald\nintellectuals\n##cky\nbooked\nnests\nmortimer\n##isi\nobsession\nsept\n##gler\n##sum\n440\nscrutiny\nsimultaneous\nsquinted\n##shin\ncollects\noven\nshankar\npenned\nremarkably\n##я\nslips\nluggage\nspectral\n1786\ncollaborations\nlouie\nconsolidation\n##ailed\n##ivating\n420\nhoover\nblackpool\nharness\nignition\nvest\ntails\nbelmont\nmongol\nskinner\n##nae\nvisually\nmage\nderry\n##tism\n##unce\nstevie\ntransitional\n##rdy\nredskins\ndrying\nprep\nprospective\n##21\nannoyance\noversee\n##loaded\nfills\n##books\n##iki\nannounces\nfda\nscowled\nrespects\nprasad\nmystic\ntucson\n##vale\nrevue\nspringer\nbankrupt\n1772\naristotle\nsalvatore\nhabsburg\n##geny\ndal\nnatal\nnut\npod\nchewing\ndarts\nmoroccan\nwalkover\nrosario\nlenin\npunjabi\n##ße\ngrossed\nscattering\nwired\ninvasive\nhui\npolynomial\ncorridors\nwakes\ngina\nportrays\n##cratic\narid\nretreating\nerich\nirwin\nsniper\n##dha\nlinen\nlindsey\nmaneuver\nbutch\nshutting\nsocio\nbounce\ncommemorative\npostseason\njeremiah\npines\n275\nmystical\nbeads\nbp\nabbas\nfurnace\nbidding\nconsulted\nassaulted\nempirical\nrubble\nenclosure\nsob\nweakly\ncancel\npolly\nyielded\n##emann\ncurly\nprediction\nbattered\n70s\nvhs\njacqueline\nrender\nsails\nbarked\ndetailing\ngrayson\nriga\nsloane\nraging\n##yah\nherbs\nbravo\n##athlon\nalloy\ngiggle\nimminent\nsuffers\nassumptions\nwaltz\n##itate\naccomplishments\n##ited\nbathing\nremixed\ndeception\nprefix\n##emia\ndeepest\n##tier\n##eis\nbalkan\nfrogs\n##rong\nslab\n##pate\nphilosophers\npeterborough\ngrains\nimports\ndickinson\nrwanda\n##atics\n1774\ndirk\nlan\ntablets\n##rove\nclone\n##rice\ncaretaker\nhostilities\nmclean\n##gre\nregimental\ntreasures\nnorms\nimpose\ntsar\ntango\ndiplomacy\nvariously\ncomplain\n192\nrecognise\narrests\n1779\ncelestial\npulitzer\n##dus\nbing\nlibretto\n##moor\nadele\nsplash\n##rite\nexpectation\nlds\nconfronts\n##izer\nspontaneous\nharmful\nwedge\nentrepreneurs\nbuyer\n##ope\nbilingual\ntranslate\nrugged\nconner\ncirculated\nuae\neaton\n##gra\n##zzle\nlingered\nlockheed\nvishnu\nreelection\nalonso\n##oom\njoints\nyankee\nheadline\ncooperate\nheinz\nlaureate\ninvading\n##sford\nechoes\nscandinavian\n##dham\nhugging\nvitamin\nsalute\nmicah\nhind\ntrader\n##sper\nradioactive\n##ndra\nmilitants\npoisoned\nratified\nremark\ncampeonato\ndeprived\nwander\nprop\n##dong\noutlook\n##tani\n##rix\n##eye\nchiang\ndarcy\n##oping\nmandolin\nspice\nstatesman\nbabylon\n182\nwalled\nforgetting\nafro\n##cap\n158\ngiorgio\nbuffer\n##polis\nplanetary\n##gis\noverlap\nterminals\nkinda\ncentenary\n##bir\narising\nmanipulate\nelm\nke\n1770\nak\n##tad\nchrysler\nmapped\nmoose\npomeranian\nquad\nmacarthur\nassemblies\nshoreline\nrecalls\nstratford\n##rted\nnoticeable\n##evic\nimp\n##rita\n##sque\naccustomed\nsupplying\ntents\ndisgusted\nvogue\nsipped\nfilters\nkhz\nreno\nselecting\nluftwaffe\nmcmahon\ntyne\nmasterpiece\ncarriages\ncollided\ndunes\nexercised\nflare\nremembers\nmuzzle\n##mobile\nheck\n##rson\nburgess\nlunged\nmiddleton\nboycott\nbilateral\n##sity\nhazardous\nlumpur\nmultiplayer\nspotlight\njackets\ngoldman\nliege\nporcelain\nrag\nwaterford\nbenz\nattracts\nhopeful\nbattling\nottomans\nkensington\nbaked\nhymns\ncheyenne\nlattice\nlevine\nborrow\npolymer\nclashes\nmichaels\nmonitored\ncommitments\ndenounced\n##25\n##von\ncavity\n##oney\nhobby\nakin\n##holders\nfutures\nintricate\ncornish\npatty\n##oned\nillegally\ndolphin\n##lag\nbarlow\nyellowish\nmaddie\napologized\nluton\nplagued\n##puram\nnana\n##rds\nsway\nfanny\nłodz\n##rino\npsi\nsuspicions\nhanged\n##eding\ninitiate\ncharlton\n##por\nnak\ncompetent\n235\nanalytical\nannex\nwardrobe\nreservations\n##rma\nsect\n162\nfairfax\nhedge\npiled\nbuckingham\nuneven\nbauer\nsimplicity\nsnyder\ninterpret\naccountability\ndonors\nmoderately\nbyrd\ncontinents\n##cite\n##max\ndisciple\nhr\njamaican\nping\nnominees\n##uss\nmongolian\ndiver\nattackers\neagerly\nideological\npillows\nmiracles\napartheid\nrevolver\nsulfur\nclinics\nmoran\n163\n##enko\nile\nkaty\nrhetoric\n##icated\nchronology\nrecycling\n##hrer\nelongated\nmughal\npascal\nprofiles\nvibration\ndatabases\ndomination\n##fare\n##rant\nmatthias\ndigest\nrehearsal\npolling\nweiss\ninitiation\nreeves\nclinging\nflourished\nimpress\nngo\n##hoff\n##ume\nbuckley\nsymposium\nrhythms\nweed\nemphasize\ntransforming\n##taking\n##gence\n##yman\naccountant\nanalyze\nflicker\nfoil\npriesthood\nvoluntarily\ndecreases\n##80\n##hya\nslater\nsv\ncharting\nmcgill\n##lde\nmoreno\n##iu\nbesieged\nzur\nrobes\n##phic\nadmitting\napi\ndeported\nturmoil\npeyton\nearthquakes\n##ares\nnationalists\nbeau\nclair\nbrethren\ninterrupt\nwelch\ncurated\ngalerie\nrequesting\n164\n##ested\nimpending\nsteward\nviper\n##vina\ncomplaining\nbeautifully\nbrandy\nfoam\nnl\n1660\n##cake\nalessandro\npunches\nlaced\nexplanations\n##lim\nattribute\nclit\nreggie\ndiscomfort\n##cards\nsmoothed\nwhales\n##cene\nadler\ncountered\nduffy\ndisciplinary\nwidening\nrecipe\nreliance\nconducts\ngoats\ngradient\npreaching\n##shaw\nmatilda\nquasi\nstriped\nmeridian\ncannabis\ncordoba\ncertificates\n##agh\n##tering\ngraffiti\nhangs\npilgrims\nrepeats\n##ych\nrevive\nurine\netat\n##hawk\nfueled\nbelts\nfuzzy\nsusceptible\n##hang\nmauritius\nsalle\nsincere\nbeers\nhooks\n##cki\narbitration\nentrusted\nadvise\nsniffed\nseminar\njunk\ndonnell\nprocessors\nprincipality\nstrapped\ncelia\nmendoza\neverton\nfortunes\nprejudice\nstarving\nreassigned\nsteamer\n##lund\ntuck\nevenly\nforeman\n##ffen\ndans\n375\nenvisioned\nslit\n##xy\nbaseman\nliberia\nrosemary\n##weed\nelectrified\nperiodically\npotassium\nstride\ncontexts\nsperm\nslade\nmariners\ninflux\nbianca\nsubcommittee\n##rane\nspilling\nicao\nestuary\n##nock\ndelivers\niphone\n##ulata\nisa\nmira\nbohemian\ndessert\n##sbury\nwelcoming\nproudly\nslowing\n##chs\nmusee\nascension\nruss\n##vian\nwaits\n##psy\nafricans\nexploit\n##morphic\ngov\neccentric\ncrab\npeck\n##ull\nentrances\nformidable\nmarketplace\ngroom\nbolted\nmetabolism\npatton\nrobbins\ncourier\npayload\nendure\n##ifier\nandes\nrefrigerator\n##pr\nornate\n##uca\nruthless\nillegitimate\nmasonry\nstrasbourg\nbikes\nadobe\n##³\napples\nquintet\nwillingly\nniche\nbakery\ncorpses\nenergetic\n##cliffe\n##sser\n##ards\n177\ncentimeters\ncentro\nfuscous\ncretaceous\nrancho\n##yde\nandrei\ntelecom\ntottenham\noasis\nordination\nvulnerability\npresiding\ncorey\ncp\npenguins\nsims\n##pis\nmalawi\npiss\n##48\ncorrection\n##cked\n##ffle\n##ryn\ncountdown\ndetectives\npsychiatrist\npsychedelic\ndinosaurs\nblouse\n##get\nchoi\nvowed\n##oz\nrandomly\n##pol\n49ers\nscrub\nblanche\nbruins\ndusseldorf\n##using\nunwanted\n##ums\n212\ndominique\nelevations\nheadlights\nom\nlaguna\n##oga\n1750\nfamously\nignorance\nshrewsbury\n##aine\najax\nbreuning\nche\nconfederacy\ngreco\noverhaul\n##screen\npaz\nskirts\ndisagreement\ncruelty\njagged\nphoebe\nshifter\nhovered\nviruses\n##wes\nmandy\n##lined\n##gc\nlandlord\nsquirrel\ndashed\n##ι\nornamental\ngag\nwally\ngrange\nliteral\nspurs\nundisclosed\nproceeding\nyin\n##text\nbillie\norphan\nspanned\nhumidity\nindy\nweighted\npresentations\nexplosions\nlucian\n##tary\nvaughn\nhindus\n##anga\n##hell\npsycho\n171\ndaytona\nprotects\nefficiently\nrematch\nsly\ntandem\n##oya\nrebranded\nimpaired\nhee\nmetropolis\npeach\ngodfrey\ndiaspora\nethnicity\nprosperous\ngleaming\ndar\ngrossing\nplayback\n##rden\nstripe\npistols\n##tain\nbirths\nlabelled\n##cating\n172\nrudy\nalba\n##onne\naquarium\nhostility\n##gb\n##tase\nshudder\nsumatra\nhardest\nlakers\nconsonant\ncreeping\ndemos\nhomicide\ncapsule\nzeke\nliberties\nexpulsion\npueblo\n##comb\ntrait\ntransporting\n##ddin\n##neck\n##yna\ndepart\ngregg\nmold\nledge\nhangar\noldham\nplayboy\ntermination\nanalysts\ngmbh\nromero\n##itic\ninsist\ncradle\nfilthy\nbrightness\nslash\nshootout\ndeposed\nbordering\n##truct\nisis\nmicrowave\ntumbled\nsheltered\ncathy\nwerewolves\nmessy\nandersen\nconvex\nclapped\nclinched\nsatire\nwasting\nedo\nvc\nrufus\n##jak\nmont\n##etti\npoznan\n##keeping\nrestructuring\ntransverse\n##rland\nazerbaijani\nslovene\ngestures\nroommate\nchoking\nshear\n##quist\nvanguard\noblivious\n##hiro\ndisagreed\nbaptism\n##lich\ncoliseum\n##aceae\nsalvage\nsociete\ncory\nlocke\nrelocation\nrelying\nversailles\nahl\nswelling\n##elo\ncheerful\n##word\n##edes\ngin\nsarajevo\nobstacle\ndiverted\n##nac\nmessed\nthoroughbred\nfluttered\nutrecht\nchewed\nacquaintance\nassassins\ndispatch\nmirza\n##wart\nnike\nsalzburg\nswell\nyen\n##gee\nidle\nligue\nsamson\n##nds\n##igh\nplayful\nspawned\n##cise\ntease\n##case\nburgundy\n##bot\nstirring\nskeptical\ninterceptions\nmarathi\n##dies\nbedrooms\naroused\npinch\n##lik\npreferences\ntattoos\nbuster\ndigitally\nprojecting\nrust\n##ital\nkitten\npriorities\naddison\npseudo\n##guard\ndusk\nicons\nsermon\n##psis\n##iba\nbt\n##lift\n##xt\nju\ntruce\nrink\n##dah\n##wy\ndefects\npsychiatry\noffences\ncalculate\nglucose\n##iful\n##rized\n##unda\nfrancaise\n##hari\nrichest\nwarwickshire\ncarly\n1763\npurity\nredemption\nlending\n##cious\nmuse\nbruises\ncerebral\naero\ncarving\n##name\npreface\nterminology\ninvade\nmonty\n##int\nanarchist\nblurred\n##iled\nrossi\ntreats\nguts\nshu\nfoothills\nballads\nundertaking\npremise\ncecilia\naffiliates\nblasted\nconditional\nwilder\nminors\ndrone\nrudolph\nbuffy\nswallowing\nhorton\nattested\n##hop\nrutherford\nhowell\nprimetime\nlivery\npenal\n##bis\nminimize\nhydro\nwrecked\nwrought\npalazzo\n##gling\ncans\nvernacular\nfriedman\nnobleman\nshale\nwalnut\ndanielle\n##ection\n##tley\nsears\n##kumar\nchords\nlend\nflipping\nstreamed\npor\ndracula\ngallons\nsacrifices\ngamble\norphanage\n##iman\nmckenzie\n##gible\nboxers\ndaly\n##balls\n##ان\n208\n##ific\n##rative\n##iq\nexploited\nslated\n##uity\ncircling\nhillary\npinched\ngoldberg\nprovost\ncampaigning\nlim\npiles\nironically\njong\nmohan\nsuccessors\nusaf\n##tem\n##ught\nautobiographical\nhaute\npreserves\n##ending\nacquitted\ncomparisons\n203\nhydroelectric\ngangs\ncypriot\ntorpedoes\nrushes\nchrome\nderive\nbumps\ninstability\nfiat\npets\n##mbe\nsilas\ndye\nreckless\nsettler\n##itation\ninfo\nheats\n##writing\n176\ncanonical\nmaltese\nfins\nmushroom\nstacy\naspen\navid\n##kur\n##loading\nvickers\ngaston\nhillside\nstatutes\nwilde\ngail\nkung\nsabine\ncomfortably\nmotorcycles\n##rgo\n169\npneumonia\nfetch\n##sonic\naxel\nfaintly\nparallels\n##oop\nmclaren\nspouse\ncompton\ninterdisciplinary\nminer\n##eni\n181\nclamped\n##chal\n##llah\nseparates\nversa\n##mler\nscarborough\nlabrador\n##lity\n##osing\nrutgers\nhurdles\ncomo\n166\nburt\ndivers\n##100\nwichita\ncade\ncoincided\n##erson\nbruised\nmla\n##pper\nvineyard\n##ili\n##brush\nnotch\nmentioning\njase\nhearted\nkits\ndoe\n##acle\npomerania\n##ady\nronan\nseizure\npavel\nproblematic\n##zaki\ndomenico\n##ulin\ncatering\npenelope\ndependence\nparental\nemilio\nministerial\natkinson\n##bolic\nclarkson\nchargers\ncolby\ngrill\npeeked\narises\nsummon\n##aged\nfools\n##grapher\nfaculties\nqaeda\n##vial\ngarner\nrefurbished\n##hwa\ngeelong\ndisasters\nnudged\nbs\nshareholder\nlori\nalgae\nreinstated\nrot\n##ades\n##nous\ninvites\nstainless\n183\ninclusive\n##itude\ndiocesan\ntil\n##icz\ndenomination\n##xa\nbenton\nfloral\nregisters\n##ider\n##erman\n##kell\nabsurd\nbrunei\nguangzhou\nhitter\nretaliation\n##uled\n##eve\nblanc\nnh\nconsistency\ncontamination\n##eres\n##rner\ndire\npalermo\nbroadcasters\ndiaries\ninspire\nvols\nbrewer\ntightening\nky\nmixtape\nhormone\n##tok\nstokes\n##color\n##dly\n##ssi\npg\n##ometer\n##lington\nsanitation\n##tility\nintercontinental\napps\n##adt\n¹⁄₂\ncylinders\neconomies\nfavourable\nunison\ncroix\ngertrude\nodyssey\nvanity\ndangling\n##logists\nupgrades\ndice\nmiddleweight\npractitioner\n##ight\n206\nhenrik\nparlor\norion\nangered\nlac\npython\nblurted\n##rri\nsensual\nintends\nswings\nangled\n##phs\nhusky\nattain\npeerage\nprecinct\ntextiles\ncheltenham\nshuffled\ndai\nconfess\ntasting\nbhutan\n##riation\ntyrone\nsegregation\nabrupt\nruiz\n##rish\nsmirked\nblackwell\nconfidential\nbrowning\namounted\n##put\nvase\nscarce\nfabulous\nraided\nstaple\nguyana\nunemployed\nglider\nshay\n##tow\ncarmine\ntroll\nintervene\nsquash\nsuperstar\n##uce\ncylindrical\nlen\nroadway\nresearched\nhandy\n##rium\n##jana\nmeta\nlao\ndeclares\n##rring\n##tadt\n##elin\n##kova\nwillem\nshrubs\nnapoleonic\nrealms\nskater\nqi\nvolkswagen\n##ł\ntad\nhara\narchaeologist\nawkwardly\neerie\n##kind\nwiley\n##heimer\n##24\ntitus\norganizers\ncfl\ncrusaders\nlama\nusb\nvent\nenraged\nthankful\noccupants\nmaximilian\n##gaard\npossessing\ntextbooks\n##oran\ncollaborator\nquaker\n##ulo\navalanche\nmono\nsilky\nstraits\nisaiah\nmustang\nsurged\nresolutions\npotomac\ndescend\ncl\nkilograms\nplato\nstrains\nsaturdays\n##olin\nbernstein\n##ype\nholstein\nponytail\n##watch\nbelize\nconversely\nheroine\nperpetual\n##ylus\ncharcoal\npiedmont\nglee\nnegotiating\nbackdrop\nprologue\n##jah\n##mmy\npasadena\nclimbs\nramos\nsunni\n##holm\n##tner\n##tri\nanand\ndeficiency\nhertfordshire\nstout\n##avi\naperture\norioles\n##irs\ndoncaster\nintrigued\nbombed\ncoating\notis\n##mat\ncocktail\n##jit\n##eto\namir\narousal\nsar\n##proof\n##act\n##ories\ndixie\npots\n##bow\nwhereabouts\n159\n##fted\ndrains\nbullying\ncottages\nscripture\ncoherent\nfore\npoe\nappetite\n##uration\nsampled\n##ators\n##dp\nderrick\nrotor\njays\npeacock\ninstallment\n##rro\nadvisors\n##coming\nrodeo\nscotch\n##mot\n##db\n##fen\n##vant\nensued\nrodrigo\ndictatorship\nmartyrs\ntwenties\n##н\ntowed\nincidence\nmarta\nrainforest\nsai\nscaled\n##cles\noceanic\nqualifiers\nsymphonic\nmcbride\ndislike\ngeneralized\naubrey\ncolonization\n##iation\n##lion\n##ssing\ndisliked\nlublin\nsalesman\n##ulates\nspherical\nwhatsoever\nsweating\navalon\ncontention\npunt\nseverity\nalderman\natari\n##dina\n##grant\n##rop\nscarf\nseville\nvertices\nannexation\nfairfield\nfascination\ninspiring\nlaunches\npalatinate\nregretted\n##rca\nferal\n##iom\nelk\nnap\nolsen\nreddy\nyong\n##leader\n##iae\ngarment\ntransports\nfeng\ngracie\noutrage\nviceroy\ninsides\n##esis\nbreakup\ngrady\norganizer\nsofter\ngrimaced\n222\nmurals\ngalicia\narranging\nvectors\n##rsten\nbas\n##sb\n##cens\nsloan\n##eka\nbitten\nara\nfender\nnausea\nbumped\nkris\nbanquet\ncomrades\ndetector\npersisted\n##llan\nadjustment\nendowed\ncinemas\n##shot\nsellers\n##uman\npeek\nepa\nkindly\nneglect\nsimpsons\ntalon\nmausoleum\nrunaway\nhangul\nlookout\n##cic\nrewards\ncoughed\nacquainted\nchloride\n##ald\nquicker\naccordion\nneolithic\n##qa\nartemis\ncoefficient\nlenny\npandora\ntx\n##xed\necstasy\nlitter\nsegunda\nchairperson\ngemma\nhiss\nrumor\nvow\nnasal\nantioch\ncompensate\npatiently\ntransformers\n##eded\njudo\nmorrow\npenis\nposthumous\nphilips\nbandits\nhusbands\ndenote\nflaming\n##any\n##phones\nlangley\nyorker\n1760\nwalters\n##uo\n##kle\ngubernatorial\nfatty\nsamsung\nleroy\noutlaw\n##nine\nunpublished\npoole\njakob\n##ᵢ\n##ₙ\ncrete\ndistorted\nsuperiority\n##dhi\nintercept\ncrust\nmig\nclaus\ncrashes\npositioning\n188\nstallion\n301\nfrontal\narmistice\n##estinal\nelton\naj\nencompassing\ncamel\ncommemorated\nmalaria\nwoodward\ncalf\ncigar\npenetrate\n##oso\nwillard\n##rno\n##uche\nillustrate\namusing\nconvergence\nnoteworthy\n##lma\n##rva\njourneys\nrealise\nmanfred\n##sable\n410\n##vocation\nhearings\nfiance\n##posed\neducators\nprovoked\nadjusting\n##cturing\nmodular\nstockton\npaterson\nvlad\nrejects\nelectors\nselena\nmaureen\n##tres\nuber\n##rce\nswirled\n##num\nproportions\nnanny\npawn\nnaturalist\nparma\napostles\nawoke\nethel\nwen\n##bey\nmonsoon\noverview\n##inating\nmccain\nrendition\nrisky\nadorned\n##ih\nequestrian\ngermain\nnj\nconspicuous\nconfirming\n##yoshi\nshivering\n##imeter\nmilestone\nrumours\nflinched\nbounds\nsmacked\ntoken\n##bei\nlectured\nautomobiles\n##shore\nimpacted\n##iable\nnouns\nnero\n##leaf\nismail\nprostitute\ntrams\n##lace\nbridget\nsud\nstimulus\nimpressions\nreins\nrevolves\n##oud\n##gned\ngiro\nhoneymoon\n##swell\ncriterion\n##sms\n##uil\nlibyan\nprefers\n##osition\n211\npreview\nsucks\naccusation\nbursts\nmetaphor\ndiffusion\ntolerate\nfaye\nbetting\ncinematographer\nliturgical\nspecials\nbitterly\nhumboldt\n##ckle\nflux\nrattled\n##itzer\narchaeologists\nodor\nauthorised\nmarshes\ndiscretion\n##ов\nalarmed\narchaic\ninverse\n##leton\nexplorers\n##pine\ndrummond\ntsunami\nwoodlands\n##minate\n##tland\nbooklet\ninsanity\nowning\ninsert\ncrafted\ncalculus\n##tore\nreceivers\n##bt\nstung\n##eca\n##nched\nprevailing\ntravellers\neyeing\nlila\ngraphs\n##borne\n178\njulien\n##won\nmorale\nadaptive\ntherapist\nerica\ncw\nlibertarian\nbowman\npitches\nvita\n##ional\ncrook\n##ads\n##entation\ncaledonia\nmutiny\n##sible\n1840s\nautomation\n##ß\nflock\n##pia\nironic\npathology\n##imus\nremarried\n##22\njoker\nwithstand\nenergies\n##att\nshropshire\nhostages\nmadeleine\ntentatively\nconflicting\nmateo\nrecipes\neuros\nol\nmercenaries\nnico\n##ndon\nalbuquerque\naugmented\nmythical\nbel\nfreud\n##child\ncough\n##lica\n365\nfreddy\nlillian\ngenetically\nnuremberg\ncalder\n209\nbonn\noutdoors\npaste\nsuns\nurgency\nvin\nrestraint\ntyson\n##cera\n##selle\nbarrage\nbethlehem\nkahn\n##par\nmounts\nnippon\nbarony\nhappier\nryu\nmakeshift\nsheldon\nblushed\ncastillo\nbarking\nlistener\ntaped\nbethel\nfluent\nheadlines\npornography\nrum\ndisclosure\nsighing\nmace\ndoubling\ngunther\nmanly\n##plex\nrt\ninterventions\nphysiological\nforwards\nemerges\n##tooth\n##gny\ncompliment\nrib\nrecession\nvisibly\nbarge\nfaults\nconnector\nexquisite\nprefect\n##rlin\npatio\n##cured\nelevators\nbrandt\nitalics\npena\n173\nwasp\nsatin\nea\nbotswana\ngraceful\nrespectable\n##jima\n##rter\n##oic\nfranciscan\ngenerates\n##dl\nalfredo\ndisgusting\n##olate\n##iously\nsherwood\nwarns\ncod\npromo\ncheryl\nsino\n##ة\n##escu\ntwitch\n##zhi\nbrownish\nthom\nortiz\n##dron\ndensely\n##beat\ncarmel\nreinforce\n##bana\n187\nanastasia\ndownhill\nvertex\ncontaminated\nremembrance\nharmonic\nhomework\n##sol\nfiancee\ngears\nolds\nangelica\nloft\nramsay\nquiz\ncolliery\nsevens\n##cape\nautism\n##hil\nwalkway\n##boats\nruben\nabnormal\nounce\nkhmer\n##bbe\nzachary\nbedside\nmorphology\npunching\n##olar\nsparrow\nconvinces\n##35\nhewitt\nqueer\nremastered\nrods\nmabel\nsolemn\nnotified\nlyricist\nsymmetric\n##xide\n174\nencore\npassports\nwildcats\n##uni\nbaja\n##pac\nmildly\n##ease\nbleed\ncommodity\nmounds\nglossy\norchestras\n##omo\ndamian\nprelude\nambitions\n##vet\nawhile\nremotely\n##aud\nasserts\nimply\n##iques\ndistinctly\nmodelling\nremedy\n##dded\nwindshield\ndani\nxiao\n##endra\naudible\npowerplant\n1300\ninvalid\nelemental\nacquisitions\n##hala\nimmaculate\nlibby\nplata\nsmuggling\nventilation\ndenoted\nminh\n##morphism\n430\ndiffered\ndion\nkelley\nlore\nmocking\nsabbath\nspikes\nhygiene\ndrown\nrunoff\nstylized\ntally\nliberated\naux\ninterpreter\nrighteous\naba\nsiren\nreaper\npearce\nmillie\n##cier\n##yra\ngaius\n##iso\ncaptures\n##ttering\ndorm\nclaudio\n##sic\nbenches\nknighted\nblackness\n##ored\ndiscount\nfumble\noxidation\nrouted\n##ς\nnovak\nperpendicular\nspoiled\nfracture\nsplits\n##urt\npads\ntopology\n##cats\naxes\nfortunate\noffenders\nprotestants\nesteem\n221\nbroadband\nconvened\nfrankly\nhound\nprototypes\nisil\nfacilitated\nkeel\n##sher\nsahara\nawaited\nbubba\norb\nprosecutors\n186\nhem\n520\n##xing\nrelaxing\nremnant\nromney\nsorted\nslalom\nstefano\nulrich\n##active\nexemption\nfolder\npauses\nfoliage\nhitchcock\nepithet\n204\ncriticisms\n##aca\nballistic\nbrody\nhinduism\nchaotic\nyouths\nequals\n##pala\npts\nthicker\nanalogous\ncapitalist\nimprovised\noverseeing\nsinatra\nascended\nbeverage\n##tl\nstraightforward\n##kon\ncurran\n##west\nbois\n325\ninduce\nsurveying\nemperors\nsax\nunpopular\n##kk\ncartoonist\nfused\n##mble\nunto\n##yuki\nlocalities\n##cko\n##ln\ndarlington\nslain\nacademie\nlobbying\nsediment\npuzzles\n##grass\ndefiance\ndickens\nmanifest\ntongues\nalumnus\narbor\ncoincide\n184\nappalachian\nmustafa\nexaminer\ncabaret\ntraumatic\nyves\nbracelet\ndraining\nheroin\nmagnum\nbaths\nodessa\nconsonants\nmitsubishi\n##gua\nkellan\nvaudeville\n##fr\njoked\nnull\nstraps\nprobation\n##ław\nceded\ninterfaces\n##pas\n##zawa\nblinding\nviet\n224\nrothschild\nmuseo\n640\nhuddersfield\n##vr\ntactic\n##storm\nbrackets\ndazed\nincorrectly\n##vu\nreg\nglazed\nfearful\nmanifold\nbenefited\nirony\n##sun\nstumbling\n##rte\nwillingness\nbalkans\nmei\nwraps\n##aba\ninjected\n##lea\ngu\nsyed\nharmless\n##hammer\nbray\ntakeoff\npoppy\ntimor\ncardboard\nastronaut\npurdue\nweeping\nsouthbound\ncursing\nstalls\ndiagonal\n##neer\nlamar\nbryce\ncomte\nweekdays\nharrington\n##uba\nnegatively\n##see\nlays\ngrouping\n##cken\n##henko\naffirmed\nhalle\nmodernist\n##lai\nhodges\nsmelling\naristocratic\nbaptized\ndismiss\njustification\noilers\n##now\ncoupling\nqin\nsnack\nhealer\n##qing\ngardener\nlayla\nbattled\nformulated\nstephenson\ngravitational\n##gill\n##jun\n1768\ngranny\ncoordinating\nsuites\n##cd\n##ioned\nmonarchs\n##cote\n##hips\nsep\nblended\napr\nbarrister\ndeposition\nfia\nmina\npolicemen\nparanoid\n##pressed\nchurchyard\ncovert\ncrumpled\ncreep\nabandoning\ntr\ntransmit\nconceal\nbarr\nunderstands\nreadiness\nspire\n##cology\n##enia\n##erry\n610\nstartling\nunlock\nvida\nbowled\nslots\n##nat\n##islav\nspaced\ntrusting\nadmire\nrig\n##ink\nslack\n##70\nmv\n207\ncasualty\n##wei\nclassmates\n##odes\n##rar\n##rked\namherst\nfurnished\nevolve\nfoundry\nmenace\nmead\n##lein\nflu\nwesleyan\n##kled\nmonterey\nwebber\n##vos\nwil\n##mith\n##на\nbartholomew\njustices\nrestrained\n##cke\namenities\n191\nmediated\nsewage\ntrenches\nml\nmainz\n##thus\n1800s\n##cula\n##inski\ncaine\nbonding\n213\nconverts\nspheres\nsuperseded\nmarianne\ncrypt\nsweaty\nensign\nhistoria\n##br\nspruce\n##post\n##ask\nforks\nthoughtfully\nyukon\npamphlet\names\n##uter\nkarma\n##yya\nbryn\nnegotiation\nsighs\nincapable\n##mbre\n##ntial\nactresses\ntaft\n##mill\nluce\nprevailed\n##amine\n1773\nmotionless\nenvoy\ntestify\ninvesting\nsculpted\ninstructors\nprovence\nkali\ncullen\nhorseback\n##while\ngoodwin\n##jos\ngaa\nnorte\n##ldon\nmodify\nwavelength\nabd\n214\nskinned\nsprinter\nforecast\nscheduling\nmarries\nsquared\ntentative\n##chman\nboer\n##isch\nbolts\nswap\nfisherman\nassyrian\nimpatiently\nguthrie\nmartins\nmurdoch\n194\ntanya\nnicely\ndolly\nlacy\nmed\n##45\nsyn\ndecks\nfashionable\nmillionaire\n##ust\nsurfing\n##ml\n##ision\nheaved\ntammy\nconsulate\nattendees\nroutinely\n197\nfuse\nsaxophonist\nbackseat\nmalaya\n##lord\nscowl\ntau\n##ishly\n193\nsighted\nsteaming\n##rks\n303\n911\n##holes\n##hong\nching\n##wife\nbless\nconserved\njurassic\nstacey\nunix\nzion\nchunk\nrigorous\nblaine\n198\npeabody\nslayer\ndismay\nbrewers\nnz\n##jer\ndet\n##glia\nglover\npostwar\nint\npenetration\nsylvester\nimitation\nvertically\nairlift\nheiress\nknoxville\nviva\n##uin\n390\nmacon\n##rim\n##fighter\n##gonal\njanice\n##orescence\n##wari\nmarius\nbelongings\nleicestershire\n196\nblanco\ninverted\npreseason\nsanity\nsobbing\n##due\n##elt\n##dled\ncollingwood\nregeneration\nflickering\nshortest\n##mount\n##osi\nfeminism\n##lat\nsherlock\ncabinets\nfumbled\nnorthbound\nprecedent\nsnaps\n##mme\nresearching\n##akes\nguillaume\ninsights\nmanipulated\nvapor\nneighbour\nsap\ngangster\nfrey\nf1\nstalking\nscarcely\ncallie\nbarnett\ntendencies\naudi\ndoomed\nassessing\nslung\npanchayat\nambiguous\nbartlett\n##etto\ndistributing\nviolating\nwolverhampton\n##hetic\nswami\nhistoire\n##urus\nliable\npounder\ngroin\nhussain\nlarsen\npopping\nsurprises\n##atter\nvie\ncurt\n##station\nmute\nrelocate\nmusicals\nauthorization\nrichter\n##sef\nimmortality\ntna\nbombings\n##press\ndeteriorated\nyiddish\n##acious\nrobbed\ncolchester\ncs\npmid\nao\nverified\nbalancing\napostle\nswayed\nrecognizable\noxfordshire\nretention\nnottinghamshire\ncontender\njudd\ninvitational\nshrimp\nuhf\n##icient\ncleaner\nlongitudinal\ntanker\n##mur\nacronym\nbroker\nkoppen\nsundance\nsuppliers\n##gil\n4000\nclipped\nfuels\npetite\n##anne\nlandslide\nhelene\ndiversion\npopulous\nlandowners\nauspices\nmelville\nquantitative\n##xes\nferries\nnicky\n##llus\ndoo\nhaunting\nroche\ncarver\ndowned\nunavailable\n##pathy\napproximation\nhiroshima\n##hue\ngarfield\nvalle\ncomparatively\nkeyboardist\ntraveler\n##eit\ncongestion\ncalculating\nsubsidiaries\n##bate\nserb\nmodernization\nfairies\ndeepened\nville\naverages\n##lore\ninflammatory\ntonga\n##itch\nco₂\nsquads\n##hea\ngigantic\nserum\nenjoyment\nretailer\nverona\n35th\ncis\n##phobic\nmagna\ntechnicians\n##vati\narithmetic\n##sport\nlevin\n##dation\namtrak\nchow\nsienna\n##eyer\nbackstage\nentrepreneurship\n##otic\nlearnt\ntao\n##udy\nworcestershire\nformulation\nbaggage\nhesitant\nbali\nsabotage\n##kari\nbarren\nenhancing\nmurmur\npl\nfreshly\nputnam\nsyntax\naces\nmedicines\nresentment\nbandwidth\n##sier\ngrins\nchili\nguido\n##sei\nframing\nimplying\ngareth\nlissa\ngenevieve\npertaining\nadmissions\ngeo\nthorpe\nproliferation\nsato\nbela\nanalyzing\nparting\n##gor\nawakened\n##isman\nhuddled\nsecrecy\n##kling\nhush\ngentry\n540\ndungeons\n##ego\ncoasts\n##utz\nsacrificed\n##chule\nlandowner\nmutually\nprevalence\nprogrammer\nadolescent\ndisrupted\nseaside\ngee\ntrusts\nvamp\ngeorgie\n##nesian\n##iol\nschedules\nsindh\n##market\netched\nhm\nsparse\nbey\nbeaux\nscratching\ngliding\nunidentified\n216\ncollaborating\ngems\njesuits\noro\naccumulation\nshaping\nmbe\nanal\n##xin\n231\nenthusiasts\nnewscast\n##egan\njanata\ndewey\nparkinson\n179\nankara\nbiennial\ntowering\ndd\ninconsistent\n950\n##chet\nthriving\nterminate\ncabins\nfuriously\neats\nadvocating\ndonkey\nmarley\nmuster\nphyllis\nleiden\n##user\ngrassland\nglittering\niucn\nloneliness\n217\nmemorandum\narmenians\n##ddle\npopularized\nrhodesia\n60s\nlame\n##illon\nsans\nbikini\nheader\norbits\n##xx\n##finger\n##ulator\nsharif\nspines\nbiotechnology\nstrolled\nnaughty\nyates\n##wire\nfremantle\nmilo\n##mour\nabducted\nremoves\n##atin\nhumming\nwonderland\n##chrome\n##ester\nhume\npivotal\n##rates\narmand\ngrams\nbelievers\nelector\nrte\napron\nbis\nscraped\n##yria\nendorsement\ninitials\n##llation\neps\ndotted\nhints\nbuzzing\nemigration\nnearer\n##tom\nindicators\n##ulu\ncoarse\nneutron\nprotectorate\n##uze\ndirectional\nexploits\npains\nloire\n1830s\nproponents\nguggenheim\nrabbits\nritchie\n305\nhectare\ninputs\nhutton\n##raz\nverify\n##ako\nboilers\nlongitude\n##lev\nskeletal\nyer\nemilia\ncitrus\ncompromised\n##gau\npokemon\nprescription\nparagraph\neduard\ncadillac\nattire\ncategorized\nkenyan\nweddings\ncharley\n##bourg\nentertain\nmonmouth\n##lles\nnutrients\ndavey\nmesh\nincentive\npractised\necosystems\nkemp\nsubdued\noverheard\n##rya\nbodily\nmaxim\n##nius\napprenticeship\nursula\n##fight\nlodged\nrug\nsilesian\nunconstitutional\npatel\ninspected\ncoyote\nunbeaten\n##hak\n34th\ndisruption\nconvict\nparcel\n##cl\n##nham\ncollier\nimplicated\nmallory\n##iac\n##lab\nsusannah\nwinkler\n##rber\nshia\nphelps\nsediments\ngraphical\nrobotic\n##sner\nadulthood\nmart\nsmoked\n##isto\nkathryn\nclarified\n##aran\ndivides\nconvictions\noppression\npausing\nburying\n##mt\nfederico\nmathias\neileen\n##tana\nkite\nhunched\n##acies\n189\n##atz\ndisadvantage\nliza\nkinetic\ngreedy\nparadox\nyokohama\ndowager\ntrunks\nventured\n##gement\ngupta\nvilnius\nolaf\n##thest\ncrimean\nhopper\n##ej\nprogressively\narturo\nmouthed\narrondissement\n##fusion\nrubin\nsimulcast\noceania\n##orum\n##stra\n##rred\nbusiest\nintensely\nnavigator\ncary\n##vine\n##hini\n##bies\nfife\nrowe\nrowland\nposing\ninsurgents\nshafts\nlawsuits\nactivate\nconor\ninward\nculturally\ngarlic\n265\n##eering\neclectic\n##hui\n##kee\n##nl\nfurrowed\nvargas\nmeteorological\nrendezvous\n##aus\nculinary\ncommencement\n##dition\nquota\n##notes\nmommy\nsalaries\noverlapping\nmule\n##iology\n##mology\nsums\nwentworth\n##isk\n##zione\nmainline\nsubgroup\n##illy\nhack\nplaintiff\nverdi\nbulb\ndifferentiation\nengagements\nmultinational\nsupplemented\nbertrand\ncaller\nregis\n##naire\n##sler\n##arts\n##imated\nblossom\npropagation\nkilometer\nviaduct\nvineyards\n##uate\nbeckett\noptimization\ngolfer\nsongwriters\nseminal\nsemitic\nthud\nvolatile\nevolving\nridley\n##wley\ntrivial\ndistributions\nscandinavia\njiang\n##ject\nwrestled\ninsistence\n##dio\nemphasizes\nnapkin\n##ods\nadjunct\nrhyme\n##ricted\n##eti\nhopeless\nsurrounds\ntremble\n32nd\nsmoky\n##ntly\noils\nmedicinal\npadded\nsteer\nwilkes\n219\n255\nconcessions\nhue\nuniquely\nblinded\nlandon\nyahoo\n##lane\nhendrix\ncommemorating\ndex\nspecify\nchicks\n##ggio\nintercity\n1400\nmorley\n##torm\nhighlighting\n##oting\npang\noblique\nstalled\n##liner\nflirting\nnewborn\n1769\nbishopric\nshaved\n232\ncurrie\n##ush\ndharma\nspartan\n##ooped\nfavorites\nsmug\nnovella\nsirens\nabusive\ncreations\nespana\n##lage\nparadigm\nsemiconductor\nsheen\n##rdo\n##yen\n##zak\nnrl\nrenew\n##pose\n##tur\nadjutant\nmarches\nnorma\n##enity\nineffective\nweimar\ngrunt\n##gat\nlordship\nplotting\nexpenditure\ninfringement\nlbs\nrefrain\nav\nmimi\nmistakenly\npostmaster\n1771\n##bara\nras\nmotorsports\ntito\n199\nsubjective\n##zza\nbully\nstew\n##kaya\nprescott\n1a\n##raphic\n##zam\nbids\nstyling\nparanormal\nreeve\nsneaking\nexploding\nkatz\nakbar\nmigrant\nsyllables\nindefinitely\n##ogical\ndestroys\nreplaces\napplause\n##phine\npest\n##fide\n218\narticulated\nbertie\n##thing\n##cars\n##ptic\ncourtroom\ncrowley\naesthetics\ncummings\ntehsil\nhormones\ntitanic\ndangerously\n##ibe\nstadion\njaenelle\nauguste\nciudad\n##chu\nmysore\npartisans\n##sio\nlucan\nphilipp\n##aly\ndebating\nhenley\ninteriors\n##rano\n##tious\nhomecoming\nbeyonce\nusher\nhenrietta\nprepares\nweeds\n##oman\nely\nplucked\n##pire\n##dable\nluxurious\n##aq\nartifact\npassword\npasture\njuno\nmaddy\nminsk\n##dder\n##ologies\n##rone\nassessments\nmartian\nroyalist\n1765\nexamines\n##mani\n##rge\nnino\n223\nparry\nscooped\nrelativity\n##eli\n##uting\n##cao\ncongregational\nnoisy\ntraverse\n##agawa\nstrikeouts\nnickelodeon\nobituary\ntransylvania\nbinds\ndepictions\npolk\ntrolley\n##yed\n##lard\nbreeders\n##under\ndryly\nhokkaido\n1762\nstrengths\nstacks\nbonaparte\nconnectivity\nneared\nprostitutes\nstamped\nanaheim\ngutierrez\nsinai\n##zzling\nbram\nfresno\nmadhya\n##86\nproton\n##lena\n##llum\n##phon\nreelected\nwanda\n##anus\n##lb\nample\ndistinguishing\n##yler\ngrasping\nsermons\ntomato\nbland\nstimulation\navenues\n##eux\nspreads\nscarlett\nfern\npentagon\nassert\nbaird\nchesapeake\nir\ncalmed\ndistortion\nfatalities\n##olis\ncorrectional\npricing\n##astic\n##gina\nprom\ndammit\nying\ncollaborate\n##chia\nwelterweight\n33rd\npointer\nsubstitution\nbonded\numpire\ncommunicating\nmultitude\npaddle\n##obe\nfederally\nintimacy\n##insky\nbetray\nssr\n##lett\n##lean\n##lves\n##therapy\nairbus\n##tery\nfunctioned\nud\nbearer\nbiomedical\nnetflix\n##hire\n##nca\ncondom\nbrink\nik\n##nical\nmacy\n##bet\nflap\ngma\nexperimented\njelly\nlavender\n##icles\n##ulia\nmunro\n##mian\n##tial\nrye\n##rle\n60th\ngigs\nhottest\nrotated\npredictions\nfuji\nbu\n##erence\n##omi\nbarangay\n##fulness\n##sas\nclocks\n##rwood\n##liness\ncereal\nroe\nwight\ndecker\nuttered\nbabu\nonion\nxml\nforcibly\n##df\npetra\nsarcasm\nhartley\npeeled\nstorytelling\n##42\n##xley\n##ysis\n##ffa\nfibre\nkiel\nauditor\nfig\nharald\ngreenville\n##berries\ngeographically\nnell\nquartz\n##athic\ncemeteries\n##lr\ncrossings\nnah\nholloway\nreptiles\nchun\nsichuan\nsnowy\n660\ncorrections\n##ivo\nzheng\nambassadors\nblacksmith\nfielded\nfluids\nhardcover\nturnover\nmedications\nmelvin\nacademies\n##erton\nro\nroach\nabsorbing\nspaniards\ncolton\n##founded\noutsider\nespionage\nkelsey\n245\nedible\n##ulf\ndora\nestablishes\n##sham\n##tries\ncontracting\n##tania\ncinematic\ncostello\nnesting\n##uron\nconnolly\nduff\n##nology\nmma\n##mata\nfergus\nsexes\ngi\noptics\nspectator\nwoodstock\nbanning\n##hee\n##fle\ndifferentiate\noutfielder\nrefinery\n226\n312\ngerhard\nhorde\nlair\ndrastically\n##udi\nlandfall\n##cheng\nmotorsport\nodi\n##achi\npredominant\nquay\nskins\n##ental\nedna\nharshly\ncomplementary\nmurdering\n##aves\nwreckage\n##90\nono\noutstretched\nlennox\nmunitions\ngalen\nreconcile\n470\nscalp\nbicycles\ngillespie\nquestionable\nrosenberg\nguillermo\nhostel\njarvis\nkabul\nvolvo\nopium\nyd\n##twined\nabuses\ndecca\noutpost\n##cino\nsensible\nneutrality\n##64\nponce\nanchorage\natkins\nturrets\ninadvertently\ndisagree\nlibre\nvodka\nreassuring\nweighs\n##yal\nglide\njumper\nceilings\nrepertory\nouts\nstain\n##bial\nenvy\n##ucible\nsmashing\nheightened\npolicing\nhyun\nmixes\nlai\nprima\n##ples\nceleste\n##bina\nlucrative\nintervened\nkc\nmanually\n##rned\nstature\nstaffed\nbun\nbastards\nnairobi\npriced\n##auer\nthatcher\n##kia\ntripped\ncomune\n##ogan\n##pled\nbrasil\nincentives\nemanuel\nhereford\nmusica\n##kim\nbenedictine\nbiennale\n##lani\neureka\ngardiner\nrb\nknocks\nsha\n##ael\n##elled\n##onate\nefficacy\nventura\nmasonic\nsanford\nmaize\nleverage\n##feit\ncapacities\nsantana\n##aur\nnovelty\nvanilla\n##cter\n##tour\nbenin\n##oir\n##rain\nneptune\ndrafting\ntallinn\n##cable\nhumiliation\n##boarding\nschleswig\nfabian\nbernardo\nliturgy\nspectacle\nsweeney\npont\nroutledge\n##tment\ncosmos\nut\nhilt\nsleek\nuniversally\n##eville\n##gawa\ntyped\n##dry\nfavors\nallegheny\nglaciers\n##rly\nrecalling\naziz\n##log\nparasite\nrequiem\nauf\n##berto\n##llin\nillumination\n##breaker\n##issa\nfestivities\nbows\ngovern\nvibe\nvp\n333\nsprawled\nlarson\npilgrim\nbwf\nleaping\n##rts\n##ssel\nalexei\ngreyhound\nhoarse\n##dler\n##oration\nseneca\n##cule\ngaping\n##ulously\n##pura\ncinnamon\n##gens\n##rricular\ncraven\nfantasies\nhoughton\nengined\nreigned\ndictator\nsupervising\n##oris\nbogota\ncommentaries\nunnatural\nfingernails\nspirituality\ntighten\n##tm\ncanadiens\nprotesting\nintentional\ncheers\nsparta\n##ytic\n##iere\n##zine\nwiden\nbelgarath\ncontrollers\ndodd\niaaf\nnavarre\n##ication\ndefect\nsquire\nsteiner\nwhisky\n##mins\n560\ninevitably\ntome\n##gold\nchew\n##uid\n##lid\nelastic\n##aby\nstreaked\nalliances\njailed\nregal\n##ined\n##phy\nczechoslovak\nnarration\nabsently\n##uld\nbluegrass\nguangdong\nquran\ncriticizing\nhose\nhari\n##liest\n##owa\nskier\nstreaks\ndeploy\n##lom\nraft\nbose\ndialed\nhuff\n##eira\nhaifa\nsimplest\nbursting\nendings\nib\nsultanate\n##titled\nfranks\nwhitman\nensures\nsven\n##ggs\ncollaborators\nforster\norganising\nui\nbanished\nnapier\ninjustice\nteller\nlayered\nthump\n##otti\nroc\nbattleships\nevidenced\nfugitive\nsadie\nrobotics\n##roud\nequatorial\ngeologist\n##iza\nyielding\n##bron\n##sr\ninternationale\nmecca\n##diment\nsbs\nskyline\ntoad\nuploaded\nreflective\nundrafted\nlal\nleafs\nbayern\n##dai\nlakshmi\nshortlisted\n##stick\n##wicz\ncamouflage\ndonate\naf\nchristi\nlau\n##acio\ndisclosed\nnemesis\n1761\nassemble\nstraining\nnorthamptonshire\ntal\n##asi\nbernardino\npremature\nheidi\n42nd\ncoefficients\ngalactic\nreproduce\nbuzzed\nsensations\nzionist\nmonsieur\nmyrtle\n##eme\narchery\nstrangled\nmusically\nviewpoint\nantiquities\nbei\ntrailers\nseahawks\ncured\npee\npreferring\ntasmanian\nlange\nsul\n##mail\n##working\ncolder\noverland\nlucivar\nmassey\ngatherings\nhaitian\n##smith\ndisapproval\nflaws\n##cco\n##enbach\n1766\nnpr\n##icular\nboroughs\ncreole\nforums\ntechno\n1755\ndent\nabdominal\nstreetcar\n##eson\n##stream\nprocurement\ngemini\npredictable\n##tya\nacheron\nchristoph\nfeeder\nfronts\nvendor\nbernhard\njammu\ntumors\nslang\n##uber\ngoaltender\ntwists\ncurving\nmanson\nvuelta\nmer\npeanut\nconfessions\npouch\nunpredictable\nallowance\ntheodor\nvascular\n##factory\nbala\nauthenticity\nmetabolic\ncoughing\nnanjing\n##cea\npembroke\n##bard\nsplendid\n36th\nff\nhourly\n##ahu\nelmer\nhandel\n##ivate\nawarding\nthrusting\ndl\nexperimentation\n##hesion\n##46\ncaressed\nentertained\nsteak\n##rangle\nbiologist\norphans\nbaroness\noyster\nstepfather\n##dridge\nmirage\nreefs\nspeeding\n##31\nbarons\n1764\n227\ninhabit\npreached\nrepealed\n##tral\nhonoring\nboogie\ncaptives\nadminister\njohanna\n##imate\ngel\nsuspiciously\n1767\nsobs\n##dington\nbackbone\nhayward\ngarry\n##folding\n##nesia\nmaxi\n##oof\n##ppe\nellison\ngalileo\n##stand\ncrimea\nfrenzy\namour\nbumper\nmatrices\nnatalia\nbaking\ngarth\npalestinians\n##grove\nsmack\nconveyed\nensembles\ngardening\n##manship\n##rup\n##stituting\n1640\nharvesting\ntopography\njing\nshifters\ndormitory\n##carriage\n##lston\nist\nskulls\n##stadt\ndolores\njewellery\nsarawak\n##wai\n##zier\nfences\nchristy\nconfinement\ntumbling\ncredibility\nfir\nstench\n##bria\n##plication\n##nged\n##sam\nvirtues\n##belt\nmarjorie\npba\n##eem\n##made\ncelebrates\nschooner\nagitated\nbarley\nfulfilling\nanthropologist\n##pro\nrestrict\nnovi\nregulating\n##nent\npadres\n##rani\n##hesive\nloyola\ntabitha\nmilky\nolson\nproprietor\ncrambidae\nguarantees\nintercollegiate\nljubljana\nhilda\n##sko\nignorant\nhooded\n##lts\nsardinia\n##lidae\n##vation\nfrontman\nprivileged\nwitchcraft\n##gp\njammed\nlaude\npoking\n##than\nbracket\namazement\nyunnan\n##erus\nmaharaja\nlinnaeus\n264\ncommissioning\nmilano\npeacefully\n##logies\nakira\nrani\nregulator\n##36\ngrasses\n##rance\nluzon\ncrows\ncompiler\ngretchen\nseaman\nedouard\ntab\nbuccaneers\nellington\nhamlets\nwhig\nsocialists\n##anto\ndirectorial\neaston\nmythological\n##kr\n##vary\nrhineland\nsemantic\ntaut\ndune\ninventions\nsucceeds\n##iter\nreplication\nbranched\n##pired\njul\nprosecuted\nkangaroo\npenetrated\n##avian\nmiddlesbrough\ndoses\nbleak\nmadam\npredatory\nrelentless\n##vili\nreluctance\n##vir\nhailey\ncrore\nsilvery\n1759\nmonstrous\nswimmers\ntransmissions\nhawthorn\ninforming\n##eral\ntoilets\ncaracas\ncrouch\nkb\n##sett\n295\ncartel\nhadley\n##aling\nalexia\nyvonne\n##biology\ncinderella\neton\nsuperb\nblizzard\nstabbing\nindustrialist\nmaximus\n##gm\n##orus\ngroves\nmaud\nclade\noversized\ncomedic\n##bella\nrosen\nnomadic\nfulham\nmontane\nbeverages\ngalaxies\nredundant\nswarm\n##rot\n##folia\n##llis\nbuckinghamshire\nfen\nbearings\nbahadur\n##rom\ngilles\nphased\ndynamite\nfaber\nbenoit\nvip\n##ount\n##wd\nbooking\nfractured\ntailored\nanya\nspices\nwestwood\ncairns\nauditions\ninflammation\nsteamed\n##rocity\n##acion\n##urne\nskyla\nthereof\nwatford\ntorment\narchdeacon\ntransforms\nlulu\ndemeanor\nfucked\nserge\n##sor\nmckenna\nminas\nentertainer\n##icide\ncaress\noriginate\nresidue\n##sty\n1740\n##ilised\n##org\nbeech\n##wana\nsubsidies\n##ghton\nemptied\ngladstone\nru\nfirefighters\nvoodoo\n##rcle\nhet\nnightingale\ntamara\nedmond\ningredient\nweaknesses\nsilhouette\n285\ncompatibility\nwithdrawing\nhampson\n##mona\nanguish\ngiggling\n##mber\nbookstore\n##jiang\nsouthernmost\ntilting\n##vance\nbai\neconomical\nrf\nbriefcase\ndreadful\nhinted\nprojections\nshattering\ntotaling\n##rogate\nanalogue\nindicted\nperiodical\nfullback\n##dman\nhaynes\n##tenberg\n##ffs\n##ishment\n1745\nthirst\nstumble\npenang\nvigorous\n##ddling\n##kor\n##lium\noctave\n##ove\n##enstein\n##inen\n##ones\nsiberian\n##uti\ncbn\nrepeal\nswaying\n##vington\nkhalid\ntanaka\nunicorn\notago\nplastered\nlobe\nriddle\n##rella\nperch\n##ishing\ncroydon\nfiltered\ngraeme\ntripoli\n##ossa\ncrocodile\n##chers\nsufi\nmined\n##tung\ninferno\nlsu\n##phi\nswelled\nutilizes\n£2\ncale\nperiodicals\nstyx\nhike\ninformally\ncoop\nlund\n##tidae\nala\nhen\nqui\ntransformations\ndisposed\nsheath\nchickens\n##cade\nfitzroy\nsas\nsilesia\nunacceptable\nodisha\n1650\nsabrina\npe\nspokane\nratios\nathena\nmassage\nshen\ndilemma\n##drum\n##riz\n##hul\ncorona\ndoubtful\nniall\n##pha\n##bino\nfines\ncite\nacknowledging\nbangor\nballard\nbathurst\n##resh\nhuron\nmustered\nalzheimer\ngarments\nkinase\ntyre\nwarship\n##cp\nflashback\npulmonary\nbraun\ncheat\nkamal\ncyclists\nconstructions\ngrenades\nndp\ntraveller\nexcuses\nstomped\nsignalling\ntrimmed\nfutsal\nmosques\nrelevance\n##wine\nwta\n##23\n##vah\n##lter\nhoc\n##riding\noptimistic\n##´s\ndeco\nsim\ninteracting\nrejecting\nmoniker\nwaterways\n##ieri\n##oku\nmayors\ngdansk\noutnumbered\npearls\n##ended\n##hampton\nfairs\ntotals\ndominating\n262\nnotions\nstairway\ncompiling\npursed\ncommodities\ngrease\nyeast\n##jong\ncarthage\ngriffiths\nresidual\namc\ncontraction\nlaird\nsapphire\n##marine\n##ivated\namalgamation\ndissolve\ninclination\nlyle\npackaged\naltitudes\nsuez\ncanons\ngraded\nlurched\nnarrowing\nboasts\nguise\nwed\nenrico\n##ovsky\nrower\nscarred\nbree\ncub\niberian\nprotagonists\nbargaining\nproposing\ntrainers\nvoyages\nvans\nfishes\n##aea\n##ivist\n##verance\nencryption\nartworks\nkazan\nsabre\ncleopatra\nhepburn\nrotting\nsupremacy\nmecklenburg\n##brate\nburrows\nhazards\noutgoing\nflair\norganizes\n##ctions\nscorpion\n##usions\nboo\n234\nchevalier\ndunedin\nslapping\n##34\nineligible\npensions\n##38\n##omic\nmanufactures\nemails\nbismarck\n238\nweakening\nblackish\nding\nmcgee\nquo\n##rling\nnorthernmost\nxx\nmanpower\ngreed\nsampson\nclicking\n##ange\n##horpe\n##inations\n##roving\ntorre\n##eptive\n##moral\nsymbolism\n38th\nasshole\nmeritorious\noutfits\nsplashed\nbiographies\nsprung\nastros\n##tale\n302\n737\nfilly\nraoul\nnw\ntokugawa\nlinden\nclubhouse\n##apa\ntracts\nromano\n##pio\nputin\ntags\n##note\nchained\ndickson\ngunshot\nmoe\ngunn\nrashid\n##tails\nzipper\n##bas\n##nea\ncontrasted\n##ply\n##udes\nplum\npharaoh\n##pile\naw\ncomedies\ningrid\nsandwiches\nsubdivisions\n1100\nmariana\nnokia\nkamen\nhz\ndelaney\nveto\nherring\n##words\npossessive\noutlines\n##roup\nsiemens\nstairwell\nrc\ngallantry\nmessiah\npalais\nyells\n233\nzeppelin\n##dm\nbolivar\n##cede\nsmackdown\nmckinley\n##mora\n##yt\nmuted\ngeologic\nfinely\nunitary\navatar\nhamas\nmaynard\nrees\nbog\ncontrasting\n##rut\nliv\nchico\ndisposition\npixel\n##erate\nbecca\ndmitry\nyeshiva\nnarratives\n##lva\n##ulton\nmercenary\nsharpe\ntempered\nnavigate\nstealth\namassed\nkeynes\n##lini\nuntouched\n##rrie\nhavoc\nlithium\n##fighting\nabyss\ngraf\nsouthward\nwolverine\nballoons\nimplements\nngos\ntransitions\n##icum\nambushed\nconcacaf\ndormant\neconomists\n##dim\ncosting\ncsi\nrana\nuniversite\nboulders\nverity\n##llon\ncollin\nmellon\nmisses\ncypress\nfluorescent\nlifeless\nspence\n##ulla\ncrewe\nshepard\npak\nrevelations\n##م\njolly\ngibbons\npaw\n##dro\n##quel\nfreeing\n##test\nshack\nfries\npalatine\n##51\n##hiko\naccompaniment\ncruising\nrecycled\n##aver\nerwin\nsorting\nsynthesizers\ndyke\nrealities\nsg\nstrides\nenslaved\nwetland\n##ghan\ncompetence\ngunpowder\ngrassy\nmaroon\nreactors\nobjection\n##oms\ncarlson\ngearbox\nmacintosh\nradios\nshelton\n##sho\nclergyman\nprakash\n254\nmongols\ntrophies\noricon\n228\nstimuli\ntwenty20\ncantonese\ncortes\nmirrored\n##saurus\nbhp\ncristina\nmelancholy\n##lating\nenjoyable\nnuevo\n##wny\ndownfall\nschumacher\n##ind\nbanging\nlausanne\nrumbled\nparamilitary\nreflex\nax\namplitude\nmigratory\n##gall\n##ups\nmidi\nbarnard\nlastly\nsherry\n##hp\n##nall\nkeystone\n##kra\ncarleton\nslippery\n##53\ncoloring\nfoe\nsocket\notter\n##rgos\nmats\n##tose\nconsultants\nbafta\nbison\ntopping\n##km\n490\nprimal\nabandonment\ntransplant\natoll\nhideous\nmort\npained\nreproduced\ntae\nhowling\n##turn\nunlawful\nbillionaire\nhotter\npoised\nlansing\n##chang\ndinamo\nretro\nmessing\nnfc\ndomesday\n##mina\nblitz\ntimed\n##athing\n##kley\nascending\ngesturing\n##izations\nsignaled\ntis\nchinatown\nmermaid\nsavanna\njameson\n##aint\ncatalina\n##pet\n##hers\ncochrane\ncy\nchatting\n##kus\nalerted\ncomputation\nmused\nnoelle\nmajestic\nmohawk\ncampo\noctagonal\n##sant\n##hend\n241\naspiring\n##mart\ncomprehend\niona\nparalyzed\nshimmering\nswindon\nrhone\n##eley\nreputed\nconfigurations\npitchfork\nagitation\nfrancais\ngillian\nlipstick\n##ilo\noutsiders\npontifical\nresisting\nbitterness\nsewer\nrockies\n##edd\n##ucher\nmisleading\n1756\nexiting\ngalloway\n##nging\nrisked\n##heart\n246\ncommemoration\nschultz\n##rka\nintegrating\n##rsa\nposes\nshrieked\n##weiler\nguineas\ngladys\njerking\nowls\ngoldsmith\nnightly\npenetrating\n##unced\nlia\n##33\nignited\nbetsy\n##aring\n##thorpe\nfollower\nvigorously\n##rave\ncoded\nkiran\nknit\nzoology\ntbilisi\n##28\n##bered\nrepository\ngovt\ndeciduous\ndino\ngrowling\n##bba\nenhancement\nunleashed\nchanting\npussy\nbiochemistry\n##eric\nkettle\nrepression\ntoxicity\nnrhp\n##arth\n##kko\n##bush\nernesto\ncommended\noutspoken\n242\nmca\nparchment\nsms\nkristen\n##aton\nbisexual\nraked\nglamour\nnavajo\na2\nconditioned\nshowcased\n##hma\nspacious\nyouthful\n##esa\nusl\nappliances\njunta\nbrest\nlayne\nconglomerate\nenchanted\nchao\nloosened\npicasso\ncirculating\ninspect\nmontevideo\n##centric\n##kti\npiazza\nspurred\n##aith\nbari\nfreedoms\npoultry\nstamford\nlieu\n##ect\nindigo\nsarcastic\nbahia\nstump\nattach\ndvds\nfrankenstein\nlille\napprox\nscriptures\npollen\n##script\nnmi\noverseen\n##ivism\ntides\nproponent\nnewmarket\ninherit\nmilling\n##erland\ncentralized\n##rou\ndistributors\ncredentials\ndrawers\nabbreviation\n##lco\n##xon\ndowning\nuncomfortably\nripe\n##oes\nerase\nfranchises\n##ever\npopulace\n##bery\n##khar\ndecomposition\npleas\n##tet\ndaryl\nsabah\n##stle\n##wide\nfearless\ngenie\nlesions\nannette\n##ogist\noboe\nappendix\nnair\ndripped\npetitioned\nmaclean\nmosquito\nparrot\nrpg\nhampered\n1648\noperatic\nreservoirs\n##tham\nirrelevant\njolt\nsummarized\n##fp\nmedallion\n##taff\n##−\nclawed\nharlow\nnarrower\ngoddard\nmarcia\nbodied\nfremont\nsuarez\naltering\ntempest\nmussolini\nporn\n##isms\nsweetly\noversees\nwalkers\nsolitude\ngrimly\nshrines\nhk\nich\nsupervisors\nhostess\ndietrich\nlegitimacy\nbrushes\nexpressive\n##yp\ndissipated\n##rse\nlocalized\nsystemic\n##nikov\ngettysburg\n##js\n##uaries\ndialogues\nmuttering\n251\nhousekeeper\nsicilian\ndiscouraged\n##frey\nbeamed\nkaladin\nhalftime\nkidnap\n##amo\n##llet\n1754\nsynonymous\ndepleted\ninstituto\ninsulin\nreprised\n##opsis\nclashed\n##ctric\ninterrupting\nradcliffe\ninsisting\nmedici\n1715\nejected\nplayfully\nturbulent\n##47\nstarvation\n##rini\nshipment\nrebellious\npetersen\nverification\nmerits\n##rified\ncakes\n##charged\n1757\nmilford\nshortages\nspying\nfidelity\n##aker\nemitted\nstorylines\nharvested\nseismic\n##iform\ncheung\nkilda\ntheoretically\nbarbie\nlynx\n##rgy\n##tius\ngoblin\nmata\npoisonous\n##nburg\nreactive\nresidues\nobedience\n##евич\nconjecture\n##rac\n401\nhating\nsixties\nkicker\nmoaning\nmotown\n##bha\nemancipation\nneoclassical\n##hering\nconsoles\nebert\nprofessorship\n##tures\nsustaining\nassaults\nobeyed\naffluent\nincurred\ntornadoes\n##eber\n##zow\nemphasizing\nhighlanders\ncheated\nhelmets\n##ctus\ninternship\nterence\nbony\nexecutions\nlegislators\nberries\npeninsular\ntinged\n##aco\n1689\namplifier\ncorvette\nribbons\nlavish\npennant\n##lander\nworthless\n##chfield\n##forms\nmariano\npyrenees\nexpenditures\n##icides\nchesterfield\nmandir\ntailor\n39th\nsergey\nnestled\nwilled\naristocracy\ndevotees\ngoodnight\nraaf\nrumored\nweaponry\nremy\nappropriations\nharcourt\nburr\nriaa\n##lence\nlimitation\nunnoticed\nguo\nsoaking\nswamps\n##tica\ncollapsing\ntatiana\ndescriptive\nbrigham\npsalm\n##chment\nmaddox\n##lization\npatti\ncaliph\n##aja\nakron\ninjuring\nserra\n##ganj\nbasins\n##sari\nastonished\nlauncher\n##church\nhilary\nwilkins\nsewing\n##sf\nstinging\n##fia\n##ncia\nunderwood\nstartup\n##ition\ncompilations\nvibrations\nembankment\njurist\n##nity\nbard\njuventus\ngroundwater\nkern\npalaces\nhelium\nboca\ncramped\nmarissa\nsoto\n##worm\njae\nprincely\n##ggy\nfaso\nbazaar\nwarmly\n##voking\n229\npairing\n##lite\n##grate\n##nets\nwien\nfreaked\nulysses\nrebirth\n##alia\n##rent\nmummy\nguzman\njimenez\nstilled\n##nitz\ntrajectory\ntha\nwoken\narchival\nprofessions\n##pts\n##pta\nhilly\nshadowy\nshrink\n##bolt\nnorwood\nglued\nmigrate\nstereotypes\ndevoid\n##pheus\n625\nevacuate\nhorrors\ninfancy\ngotham\nknowles\noptic\ndownloaded\nsachs\nkingsley\nparramatta\ndarryl\nmor\n##onale\nshady\ncommence\nconfesses\nkan\n##meter\n##placed\nmarlborough\nroundabout\nregents\nfrigates\nio\n##imating\ngothenburg\nrevoked\ncarvings\nclockwise\nconvertible\nintruder\n##sche\nbanged\n##ogo\nvicky\nbourgeois\n##mony\ndupont\nfooting\n##gum\npd\n##real\nbuckle\nyun\npenthouse\nsane\n720\nserviced\nstakeholders\nneumann\nbb\n##eers\ncomb\n##gam\ncatchment\npinning\nrallies\ntyping\n##elles\nforefront\nfreiburg\nsweetie\ngiacomo\nwidowed\ngoodwill\nworshipped\naspirations\nmidday\n##vat\nfishery\n##trick\nbournemouth\nturk\n243\nhearth\nethanol\nguadalajara\nmurmurs\nsl\n##uge\nafforded\nscripted\n##hta\nwah\n##jn\ncoroner\ntranslucent\n252\nmemorials\npuck\nprogresses\nclumsy\n##race\n315\ncandace\nrecounted\n##27\n##slin\n##uve\nfiltering\n##mac\nhowl\nstrata\nheron\nleveled\n##ays\ndubious\n##oja\n##т\n##wheel\ncitations\nexhibiting\n##laya\n##mics\n##pods\nturkic\n##lberg\ninjunction\n##ennial\n##mit\nantibodies\n##44\norganise\n##rigues\ncardiovascular\ncushion\ninverness\n##zquez\ndia\ncocoa\nsibling\n##tman\n##roid\nexpanse\nfeasible\ntunisian\nalgiers\n##relli\nrus\nbloomberg\ndso\nwestphalia\nbro\ntacoma\n281\ndownloads\n##ours\nkonrad\nduran\n##hdi\ncontinuum\njett\ncompares\nlegislator\nsecession\n##nable\n##gues\n##zuka\ntranslating\nreacher\n##gley\n##ła\naleppo\n##agi\ntc\norchards\ntrapping\nlinguist\nversatile\ndrumming\npostage\ncalhoun\nsuperiors\n##mx\nbarefoot\nleary\n##cis\nignacio\nalfa\nkaplan\n##rogen\nbratislava\nmori\n##vot\ndisturb\nhaas\n313\ncartridges\ngilmore\nradiated\nsalford\ntunic\nhades\n##ulsive\narcheological\ndelilah\nmagistrates\nauditioned\nbrewster\ncharters\nempowerment\nblogs\ncappella\ndynasties\niroquois\nwhipping\n##krishna\nraceway\ntruths\nmyra\nweaken\njudah\nmcgregor\n##horse\nmic\nrefueling\n37th\nburnley\nbosses\nmarkus\npremio\nquery\n##gga\ndunbar\n##economic\ndarkest\nlyndon\nsealing\ncommendation\nreappeared\n##mun\naddicted\nezio\nslaughtered\nsatisfactory\nshuffle\n##eves\n##thic\n##uj\nfortification\nwarrington\n##otto\nresurrected\nfargo\nmane\n##utable\n##lei\n##space\nforeword\nox\n##aris\n##vern\nabrams\nhua\n##mento\nsakura\n##alo\nuv\nsentimental\n##skaya\nmidfield\n##eses\nsturdy\nscrolls\nmacleod\n##kyu\nentropy\n##lance\nmitochondrial\ncicero\nexcelled\nthinner\nconvoys\nperceive\n##oslav\n##urable\nsystematically\ngrind\nburkina\n287\n##tagram\nops\n##aman\nguantanamo\n##cloth\n##tite\nforcefully\nwavy\n##jou\npointless\n##linger\n##tze\nlayton\nportico\nsuperficial\nclerical\noutlaws\n##hism\nburials\nmuir\n##inn\ncreditors\nhauling\nrattle\n##leg\ncalais\nmonde\narchers\nreclaimed\ndwell\nwexford\nhellenic\nfalsely\nremorse\n##tek\ndough\nfurnishings\n##uttered\ngabon\nneurological\nnovice\n##igraphy\ncontemplated\npulpit\nnightstand\nsaratoga\n##istan\ndocumenting\npulsing\ntaluk\n##firmed\nbusted\nmarital\n##rien\ndisagreements\nwasps\n##yes\nhodge\nmcdonnell\nmimic\nfran\npendant\ndhabi\nmusa\n##nington\ncongratulations\nargent\ndarrell\nconcussion\nlosers\nregrets\nthessaloniki\nreversal\ndonaldson\nhardwood\nthence\nachilles\nritter\n##eran\ndemonic\njurgen\nprophets\ngoethe\neki\nclassmate\nbuff\n##cking\nyank\nirrational\n##inging\nperished\nseductive\nqur\nsourced\n##crat\n##typic\nmustard\nravine\nbarre\nhorizontally\ncharacterization\nphylogenetic\nboise\n##dit\n##runner\n##tower\nbrutally\nintercourse\nseduce\n##bbing\nfay\nferris\nogden\namar\nnik\nunarmed\n##inator\nevaluating\nkyrgyzstan\nsweetness\n##lford\n##oki\nmccormick\nmeiji\nnotoriety\nstimulate\ndisrupt\nfiguring\ninstructional\nmcgrath\n##zoo\ngroundbreaking\n##lto\nflinch\nkhorasan\nagrarian\nbengals\nmixer\nradiating\n##sov\ningram\npitchers\nnad\ntariff\n##cript\ntata\n##codes\n##emi\n##ungen\nappellate\nlehigh\n##bled\n##giri\nbrawl\nduct\ntexans\n##ciation\n##ropolis\nskipper\nspeculative\nvomit\ndoctrines\nstresses\n253\ndavy\ngraders\nwhitehead\njozef\ntimely\ncumulative\nharyana\npaints\nappropriately\nboon\ncactus\n##ales\n##pid\ndow\nlegions\n##pit\nperceptions\n1730\npicturesque\n##yse\nperiphery\nrune\nwr\n##aha\nceltics\nsentencing\nwhoa\n##erin\nconfirms\nvariance\n425\nmoines\nmathews\nspade\nrave\nm1\nfronted\nfx\nblending\nalleging\nreared\n##gl\n237\n##paper\ngrassroots\neroded\n##free\n##physical\ndirects\nordeal\n##sław\naccelerate\nhacker\nrooftop\n##inia\nlev\nbuys\ncebu\ndevote\n##lce\nspecialising\n##ulsion\nchoreographed\nrepetition\nwarehouses\n##ryl\npaisley\ntuscany\nanalogy\nsorcerer\nhash\nhuts\nshards\ndescends\nexclude\nnix\nchaplin\ngaga\nito\nvane\n##drich\ncauseway\nmisconduct\nlimo\norchestrated\nglands\njana\n##kot\nu2\n##mple\n##sons\nbranching\ncontrasts\nscoop\nlonged\n##virus\nchattanooga\n##75\nsyrup\ncornerstone\n##tized\n##mind\n##iaceae\ncareless\nprecedence\nfrescoes\n##uet\nchilled\nconsult\nmodelled\nsnatch\npeat\n##thermal\ncaucasian\nhumane\nrelaxation\nspins\ntemperance\n##lbert\noccupations\nlambda\nhybrids\nmoons\nmp3\n##oese\n247\nrolf\nsocietal\nyerevan\nness\n##ssler\nbefriended\nmechanized\nnominate\ntrough\nboasted\ncues\nseater\n##hom\nbends\n##tangle\nconductors\nemptiness\n##lmer\neurasian\nadriatic\ntian\n##cie\nanxiously\nlark\npropellers\nchichester\njock\nev\n2a\n##holding\ncredible\nrecounts\ntori\nloyalist\nabduction\n##hoot\n##redo\nnepali\n##mite\nventral\ntempting\n##ango\n##crats\nsteered\n##wice\njavelin\ndipping\nlaborers\nprentice\nlooming\ntitanium\n##ː\nbadges\nemir\ntensor\n##ntation\negyptians\nrash\ndenies\nhawthorne\nlombard\nshowers\nwehrmacht\ndietary\ntrojan\n##reus\nwelles\nexecuting\nhorseshoe\nlifeboat\n##lak\nelsa\ninfirmary\nnearing\nroberta\nboyer\nmutter\ntrillion\njoanne\n##fine\n##oked\nsinks\nvortex\nuruguayan\nclasp\nsirius\n##block\naccelerator\nprohibit\nsunken\nbyu\nchronological\ndiplomats\nochreous\n510\nsymmetrical\n1644\nmaia\n##tology\nsalts\nreigns\natrocities\n##ия\nhess\nbared\nissn\n##vyn\ncater\nsaturated\n##cycle\n##isse\nsable\nvoyager\ndyer\nyusuf\n##inge\nfountains\nwolff\n##39\n##nni\nengraving\nrollins\natheist\nominous\n##ault\nherr\nchariot\nmartina\nstrung\n##fell\n##farlane\nhorrific\nsahib\ngazes\nsaetan\nerased\nptolemy\n##olic\nflushing\nlauderdale\nanalytic\n##ices\n530\nnavarro\nbeak\ngorilla\nherrera\nbroom\nguadalupe\nraiding\nsykes\n311\nbsc\ndeliveries\n1720\ninvasions\ncarmichael\ntajikistan\nthematic\necumenical\nsentiments\nonstage\n##rians\n##brand\n##sume\ncatastrophic\nflanks\nmolten\n##arns\nwaller\naimee\nterminating\n##icing\nalternately\n##oche\nnehru\nprinters\noutraged\n##eving\nempires\ntemplate\nbanners\nrepetitive\nza\n##oise\nvegetarian\n##tell\nguiana\nopt\ncavendish\nlucknow\nsynthesized\n##hani\n##mada\nfinalized\n##ctable\nfictitious\nmayoral\nunreliable\n##enham\nembracing\npeppers\nrbis\n##chio\n##neo\ninhibition\nslashed\ntogo\norderly\nembroidered\nsafari\nsalty\n236\nbarron\nbenito\ntotaled\n##dak\npubs\nsimulated\ncaden\ndevin\ntolkien\nmomma\nwelding\nsesame\n##ept\ngottingen\nhardness\n630\nshaman\ntemeraire\n620\nadequately\npediatric\n##kit\nck\nassertion\nradicals\ncomposure\ncadence\nseafood\nbeaufort\nlazarus\nmani\nwarily\ncunning\nkurdistan\n249\ncantata\n##kir\nares\n##41\n##clusive\nnape\ntownland\ngeared\ninsulted\nflutter\nboating\nviolate\ndraper\ndumping\nmalmo\n##hh\n##romatic\nfirearm\nalta\nbono\nobscured\n##clave\nexceeds\npanorama\nunbelievable\n##train\npreschool\n##essed\ndisconnected\ninstalling\nrescuing\nsecretaries\naccessibility\n##castle\n##drive\n##ifice\n##film\nbouts\nslug\nwaterway\nmindanao\n##buro\n##ratic\nhalves\n##ل\ncalming\nliter\nmaternity\nadorable\nbragg\nelectrification\nmcc\n##dote\nroxy\nschizophrenia\n##body\nmunoz\nkaye\nwhaling\n239\nmil\ntingling\ntolerant\n##ago\nunconventional\nvolcanoes\n##finder\ndeportivo\n##llie\nrobson\nkaufman\nneuroscience\nwai\ndeportation\nmasovian\nscraping\nconverse\n##bh\nhacking\nbulge\n##oun\nadministratively\nyao\n580\namp\nmammoth\nbooster\nclaremont\nhooper\nnomenclature\npursuits\nmclaughlin\nmelinda\n##sul\ncatfish\nbarclay\nsubstrates\ntaxa\nzee\noriginals\nkimberly\npackets\npadma\n##ality\nborrowing\nostensibly\nsolvent\n##bri\n##genesis\n##mist\nlukas\nshreveport\nveracruz\n##ь\n##lou\n##wives\ncheney\ntt\nanatolia\nhobbs\n##zyn\ncyclic\nradiant\nalistair\ngreenish\nsiena\ndat\nindependents\n##bation\nconform\npieter\nhyper\napplicant\nbradshaw\nspores\ntelangana\nvinci\ninexpensive\nnuclei\n322\njang\nnme\nsoho\nspd\n##ign\ncradled\nreceptionist\npow\n##43\n##rika\nfascism\n##ifer\nexperimenting\n##ading\n##iec\n##region\n345\njocelyn\nmaris\nstair\nnocturnal\ntoro\nconstabulary\nelgin\n##kker\nmsc\n##giving\n##schen\n##rase\ndoherty\ndoping\nsarcastically\nbatter\nmaneuvers\n##cano\n##apple\n##gai\n##git\nintrinsic\n##nst\n##stor\n1753\nshowtime\ncafes\ngasps\nlviv\nushered\n##thed\nfours\nrestart\nastonishment\ntransmitting\nflyer\nshrugs\n##sau\nintriguing\ncones\ndictated\nmushrooms\nmedial\n##kovsky\n##elman\nescorting\ngaped\n##26\ngodfather\n##door\n##sell\ndjs\nrecaptured\ntimetable\nvila\n1710\n3a\naerodrome\nmortals\nscientology\n##orne\nangelina\nmag\nconvection\nunpaid\ninsertion\nintermittent\nlego\n##nated\nendeavor\nkota\npereira\n##lz\n304\nbwv\nglamorgan\ninsults\nagatha\nfey\n##cend\nfleetwood\nmahogany\nprotruding\nsteamship\nzeta\n##arty\nmcguire\nsuspense\n##sphere\nadvising\nurges\n##wala\nhurriedly\nmeteor\ngilded\ninline\narroyo\nstalker\n##oge\nexcitedly\nrevered\n##cure\nearle\nintroductory\n##break\n##ilde\nmutants\npuff\npulses\nreinforcement\n##haling\ncurses\nlizards\nstalk\ncorrelated\n##fixed\nfallout\nmacquarie\n##unas\nbearded\ndenton\nheaving\n802\n##ocation\nwinery\nassign\ndortmund\n##lkirk\neverest\ninvariant\ncharismatic\nsusie\n##elling\nbled\nlesley\ntelegram\nsumner\nbk\n##ogen\n##к\nwilcox\nneedy\ncolbert\nduval\n##iferous\n##mbled\nallotted\nattends\nimperative\n##hita\nreplacements\nhawker\n##inda\ninsurgency\n##zee\n##eke\ncasts\n##yla\n680\nives\ntransitioned\n##pack\n##powering\nauthoritative\nbaylor\nflex\ncringed\nplaintiffs\nwoodrow\n##skie\ndrastic\nape\naroma\nunfolded\ncommotion\nnt\npreoccupied\ntheta\nroutines\nlasers\nprivatization\nwand\ndomino\nek\nclenching\nnsa\nstrategically\nshowered\nbile\nhandkerchief\npere\nstoring\nchristophe\ninsulting\n316\nnakamura\nromani\nasiatic\nmagdalena\npalma\ncruises\nstripping\n405\nkonstantin\nsoaring\n##berman\ncolloquially\nforerunner\nhavilland\nincarcerated\nparasites\nsincerity\n##utus\ndisks\nplank\nsaigon\n##ining\ncorbin\nhomo\nornaments\npowerhouse\n##tlement\nchong\nfastened\nfeasibility\nidf\nmorphological\nusable\n##nish\n##zuki\naqueduct\njaguars\nkeepers\n##flies\naleksandr\nfaust\nassigns\newing\nbacterium\nhurled\ntricky\nhungarians\nintegers\nwallis\n321\nyamaha\n##isha\nhushed\noblivion\naviator\nevangelist\nfriars\n##eller\nmonograph\node\n##nary\nairplanes\nlabourers\ncharms\n##nee\n1661\nhagen\ntnt\nrudder\nfiesta\ntranscript\ndorothea\nska\ninhibitor\nmaccabi\nretorted\nraining\nencompassed\nclauses\nmenacing\n1642\nlineman\n##gist\nvamps\n##ape\n##dick\ngloom\n##rera\ndealings\neasing\nseekers\n##nut\n##pment\nhelens\nunmanned\n##anu\n##isson\nbasics\n##amy\n##ckman\nadjustments\n1688\nbrutality\nhorne\n##zell\nsui\n##55\n##mable\naggregator\n##thal\nrhino\n##drick\n##vira\ncounters\nzoom\n##01\n##rting\nmn\nmontenegrin\npackard\n##unciation\n##♭\n##kki\nreclaim\nscholastic\nthugs\npulsed\n##icia\nsyriac\nquan\nsaddam\nbanda\nkobe\nblaming\nbuddies\ndissent\n##lusion\n##usia\ncorbett\njaya\ndelle\nerratic\nlexie\n##hesis\n435\namiga\nhermes\n##pressing\n##leen\nchapels\ngospels\njamal\n##uating\ncompute\nrevolving\nwarp\n##sso\n##thes\narmory\n##eras\n##gol\nantrim\nloki\n##kow\n##asian\n##good\n##zano\nbraid\nhandwriting\nsubdistrict\nfunky\npantheon\n##iculate\nconcurrency\nestimation\nimproper\njuliana\n##his\nnewcomers\njohnstone\nstaten\ncommunicated\n##oco\n##alle\nsausage\nstormy\n##stered\n##tters\nsuperfamily\n##grade\nacidic\ncollateral\ntabloid\n##oped\n##rza\nbladder\nausten\n##ellant\nmcgraw\n##hay\nhannibal\nmein\naquino\nlucifer\nwo\nbadger\nboar\ncher\nchristensen\ngreenberg\ninterruption\n##kken\njem\n244\nmocked\nbottoms\ncambridgeshire\n##lide\nsprawling\n##bbly\neastwood\nghent\nsynth\n##buck\nadvisers\n##bah\nnominally\nhapoel\nqu\ndaggers\nestranged\nfabricated\ntowels\nvinnie\nwcw\nmisunderstanding\nanglia\nnothin\nunmistakable\n##dust\n##lova\nchilly\nmarquette\ntruss\n##edge\n##erine\nreece\n##lty\n##chemist\n##connected\n272\n308\n41st\nbash\nraion\nwaterfalls\n##ump\n##main\nlabyrinth\nqueue\ntheorist\n##istle\nbharatiya\nflexed\nsoundtracks\nrooney\nleftist\npatrolling\nwharton\nplainly\nalleviate\neastman\nschuster\ntopographic\nengages\nimmensely\nunbearable\nfairchild\n1620\ndona\nlurking\nparisian\noliveira\nia\nindictment\nhahn\nbangladeshi\n##aster\nvivo\n##uming\n##ential\nantonia\nexpects\nindoors\nkildare\nharlan\n##logue\n##ogenic\n##sities\nforgiven\n##wat\nchildish\ntavi\n##mide\n##orra\nplausible\ngrimm\nsuccessively\nscooted\n##bola\n##dget\n##rith\nspartans\nemery\nflatly\nazure\nepilogue\n##wark\nflourish\n##iny\n##tracted\n##overs\n##oshi\nbestseller\ndistressed\nreceipt\nspitting\nhermit\ntopological\n##cot\ndrilled\nsubunit\nfrancs\n##layer\neel\n##fk\n##itas\noctopus\nfootprint\npetitions\nufo\n##say\n##foil\ninterfering\nleaking\npalo\n##metry\nthistle\nvaliant\n##pic\nnarayan\nmcpherson\n##fast\ngonzales\n##ym\n##enne\ndustin\nnovgorod\nsolos\n##zman\ndoin\n##raph\n##patient\n##meyer\nsoluble\nashland\ncuffs\ncarole\npendleton\nwhistling\nvassal\n##river\ndeviation\nrevisited\nconstituents\nrallied\nrotate\nloomed\n##eil\n##nting\namateurs\naugsburg\nauschwitz\ncrowns\nskeletons\n##cona\nbonnet\n257\ndummy\nglobalization\nsimeon\nsleeper\nmandal\ndifferentiated\n##crow\n##mare\nmilne\nbundled\nexasperated\ntalmud\nowes\nsegregated\n##feng\n##uary\ndentist\npiracy\nprops\n##rang\ndevlin\n##torium\nmalicious\npaws\n##laid\ndependency\n##ergy\n##fers\n##enna\n258\npistons\nrourke\njed\ngrammatical\ntres\nmaha\nwig\n512\nghostly\njayne\n##achal\n##creen\n##ilis\n##lins\n##rence\ndesignate\n##with\narrogance\ncambodian\nclones\nshowdown\nthrottle\ntwain\n##ception\nlobes\nmetz\nnagoya\n335\nbraking\n##furt\n385\nroaming\n##minster\namin\ncrippled\n##37\n##llary\nindifferent\nhoffmann\nidols\nintimidating\n1751\n261\ninfluenza\nmemo\nonions\n1748\nbandage\nconsciously\n##landa\n##rage\nclandestine\nobserves\nswiped\ntangle\n##ener\n##jected\n##trum\n##bill\n##lta\nhugs\ncongresses\njosiah\nspirited\n##dek\nhumanist\nmanagerial\nfilmmaking\ninmate\nrhymes\ndebuting\ngrimsby\nur\n##laze\nduplicate\nvigor\n##tf\nrepublished\nbolshevik\nrefurbishment\nantibiotics\nmartini\nmethane\nnewscasts\nroyale\nhorizons\nlevant\niain\nvisas\n##ischen\npaler\n##around\nmanifestation\nsnuck\nalf\nchop\nfutile\npedestal\nrehab\n##kat\nbmg\nkerman\nres\nfairbanks\njarrett\nabstraction\nsaharan\n##zek\n1746\nprocedural\nclearer\nkincaid\nsash\nluciano\n##ffey\ncrunch\nhelmut\n##vara\nrevolutionaries\n##tute\ncreamy\nleach\n##mmon\n1747\npermitting\nnes\nplight\nwendell\n##lese\ncontra\nts\nclancy\nipa\nmach\nstaples\nautopsy\ndisturbances\nnueva\nkarin\npontiac\n##uding\nproxy\nvenerable\nhaunt\nleto\nbergman\nexpands\n##helm\nwal\n##pipe\ncanning\nceline\ncords\nobesity\n##enary\nintrusion\nplanner\n##phate\nreasoned\nsequencing\n307\nharrow\n##chon\n##dora\nmarred\nmcintyre\nrepay\ntarzan\ndarting\n248\nharrisburg\nmargarita\nrepulsed\n##hur\n##lding\nbelinda\nhamburger\nnovo\ncompliant\nrunways\nbingham\nregistrar\nskyscraper\nic\ncuthbert\nimprovisation\nlivelihood\n##corp\n##elial\nadmiring\n##dened\nsporadic\nbeliever\ncasablanca\npopcorn\n##29\nasha\nshovel\n##bek\n##dice\ncoiled\ntangible\n##dez\ncasper\nelsie\nresin\ntenderness\nrectory\n##ivision\navail\nsonar\n##mori\nboutique\n##dier\nguerre\nbathed\nupbringing\nvaulted\nsandals\nblessings\n##naut\n##utnant\n1680\n306\nfoxes\npia\ncorrosion\nhesitantly\nconfederates\ncrystalline\nfootprints\nshapiro\ntirana\nvalentin\ndrones\n45th\nmicroscope\nshipments\ntexted\ninquisition\nwry\nguernsey\nunauthorized\nresigning\n760\nripple\nschubert\nstu\nreassure\nfelony\n##ardo\nbrittle\nkoreans\n##havan\n##ives\ndun\nimplicit\ntyres\n##aldi\n##lth\nmagnolia\n##ehan\n##puri\n##poulos\naggressively\nfei\ngr\nfamiliarity\n##poo\nindicative\n##trust\nfundamentally\njimmie\noverrun\n395\nanchors\nmoans\n##opus\nbritannia\narmagh\n##ggle\npurposely\nseizing\n##vao\nbewildered\nmundane\navoidance\ncosmopolitan\ngeometridae\nquartermaster\ncaf\n415\nchatter\nengulfed\ngleam\npurge\n##icate\njuliette\njurisprudence\nguerra\nrevisions\n##bn\ncasimir\nbrew\n##jm\n1749\nclapton\ncloudy\nconde\nhermitage\n278\nsimulations\ntorches\nvincenzo\nmatteo\n##rill\nhidalgo\nbooming\nwestbound\naccomplishment\ntentacles\nunaffected\n##sius\nannabelle\nflopped\nsloping\n##litz\ndreamer\ninterceptor\nvu\n##loh\nconsecration\ncopying\nmessaging\nbreaker\nclimates\nhospitalized\n1752\ntorino\nafternoons\nwinfield\nwitnessing\n##teacher\nbreakers\nchoirs\nsawmill\ncoldly\n##ege\nsipping\nhaste\nuninhabited\nconical\nbibliography\npamphlets\nsevern\nedict\n##oca\ndeux\nillnesses\ngrips\n##pl\nrehearsals\nsis\nthinkers\ntame\n##keepers\n1690\nacacia\nreformer\n##osed\n##rys\nshuffling\n##iring\n##shima\neastbound\nionic\nrhea\nflees\nlittered\n##oum\nrocker\nvomiting\ngroaning\nchamp\noverwhelmingly\ncivilizations\npaces\nsloop\nadoptive\n##tish\nskaters\n##vres\naiding\nmango\n##joy\nnikola\nshriek\n##ignon\npharmaceuticals\n##mg\ntuna\ncalvert\ngustavo\nstocked\nyearbook\n##urai\n##mana\ncomputed\nsubsp\nriff\nhanoi\nkelvin\nhamid\nmoors\npastures\nsummons\njihad\nnectar\n##ctors\nbayou\nuntitled\npleasing\nvastly\nrepublics\nintellect\n##η\n##ulio\n##tou\ncrumbling\nstylistic\nsb\n##ی\nconsolation\nfrequented\nh₂o\nwalden\nwidows\n##iens\n404\n##ignment\nchunks\nimproves\n288\ngrit\nrecited\n##dev\nsnarl\nsociological\n##arte\n##gul\ninquired\n##held\nbruise\nclube\nconsultancy\nhomogeneous\nhornets\nmultiplication\npasta\nprick\nsavior\n##grin\n##kou\n##phile\nyoon\n##gara\ngrimes\nvanishing\ncheering\nreacting\nbn\ndistillery\n##quisite\n##vity\ncoe\ndockyard\nmassif\n##jord\nescorts\nvoss\n##valent\nbyte\nchopped\nhawke\nillusions\nworkings\nfloats\n##koto\n##vac\nkv\nannapolis\nmadden\n##onus\nalvaro\nnoctuidae\n##cum\n##scopic\navenge\nsteamboat\nforte\nillustrates\nerika\n##trip\n570\ndew\nnationalities\nbran\nmanifested\nthirsty\ndiversified\nmuscled\nreborn\n##standing\narson\n##lessness\n##dran\n##logram\n##boys\n##kushima\n##vious\nwilloughby\n##phobia\n286\nalsace\ndashboard\nyuki\n##chai\ngranville\nmyspace\npublicized\ntricked\n##gang\nadjective\n##ater\nrelic\nreorganisation\nenthusiastically\nindications\nsaxe\n##lassified\nconsolidate\niec\npadua\nhelplessly\nramps\nrenaming\nregulars\npedestrians\naccents\nconvicts\ninaccurate\nlowers\nmana\n##pati\nbarrie\nbjp\noutta\nsomeplace\nberwick\nflanking\ninvoked\nmarrow\nsparsely\nexcerpts\nclothed\nrei\n##ginal\nwept\n##straße\n##vish\nalexa\nexcel\n##ptive\nmembranes\naquitaine\ncreeks\ncutler\nsheppard\nimplementations\nns\n##dur\nfragrance\nbudge\nconcordia\nmagnesium\nmarcelo\n##antes\ngladly\nvibrating\n##rral\n##ggles\nmontrose\n##omba\nlew\nseamus\n1630\ncocky\n##ament\n##uen\nbjorn\n##rrick\nfielder\nfluttering\n##lase\nmethyl\nkimberley\nmcdowell\nreductions\nbarbed\n##jic\n##tonic\naeronautical\ncondensed\ndistracting\n##promising\nhuffed\n##cala\n##sle\nclaudius\ninvincible\nmissy\npious\nbalthazar\nci\n##lang\nbutte\ncombo\norson\n##dication\nmyriad\n1707\nsilenced\n##fed\n##rh\ncoco\nnetball\nyourselves\n##oza\nclarify\nheller\npeg\ndurban\netudes\noffender\nroast\nblackmail\ncurvature\n##woods\nvile\n309\nillicit\nsuriname\n##linson\noverture\n1685\nbubbling\ngymnast\ntucking\n##mming\n##ouin\nmaldives\n##bala\ngurney\n##dda\n##eased\n##oides\nbackside\npinto\njars\nracehorse\ntending\n##rdial\nbaronetcy\nwiener\nduly\n##rke\nbarbarian\ncupping\nflawed\n##thesis\nbertha\npleistocene\npuddle\nswearing\n##nob\n##tically\nfleeting\nprostate\namulet\neducating\n##mined\n##iti\n##tler\n75th\njens\nrespondents\nanalytics\ncavaliers\npapacy\nraju\n##iente\n##ulum\n##tip\nfunnel\n271\ndisneyland\n##lley\nsociologist\n##iam\n2500\nfaulkner\nlouvre\nmenon\n##dson\n276\n##ower\nafterlife\nmannheim\npeptide\nreferees\ncomedians\nmeaningless\n##anger\n##laise\nfabrics\nhurley\nrenal\nsleeps\n##bour\n##icle\nbreakout\nkristin\nroadside\nanimator\nclover\ndisdain\nunsafe\nredesign\n##urity\nfirth\nbarnsley\nportage\nreset\nnarrows\n268\ncommandos\nexpansive\nspeechless\ntubular\n##lux\nessendon\neyelashes\nsmashwords\n##yad\n##bang\n##claim\ncraved\nsprinted\nchet\nsomme\nastor\nwrocław\norton\n266\nbane\n##erving\n##uing\nmischief\n##amps\n##sund\nscaling\nterre\n##xious\nimpairment\noffenses\nundermine\nmoi\nsoy\ncontiguous\narcadia\ninuit\nseam\n##tops\nmacbeth\nrebelled\n##icative\n##iot\n590\nelaborated\nfrs\nuniformed\n##dberg\n259\npowerless\npriscilla\nstimulated\n980\nqc\narboretum\nfrustrating\ntrieste\nbullock\n##nified\nenriched\nglistening\nintern\n##adia\nlocus\nnouvelle\nollie\nike\nlash\nstarboard\nee\ntapestry\nheadlined\nhove\nrigged\n##vite\npollock\n##yme\nthrive\nclustered\ncas\nroi\ngleamed\nolympiad\n##lino\npressured\nregimes\n##hosis\n##lick\nripley\n##ophone\nkickoff\ngallon\nrockwell\n##arable\ncrusader\nglue\nrevolutions\nscrambling\n1714\ngrover\n##jure\nenglishman\naztec\n263\ncontemplating\ncoven\nipad\npreach\ntriumphant\ntufts\n##esian\nrotational\n##phus\n328\nfalkland\n##brates\nstrewn\nclarissa\nrejoin\nenvironmentally\nglint\nbanded\ndrenched\nmoat\nalbanians\njohor\nrr\nmaestro\nmalley\nnouveau\nshaded\ntaxonomy\nv6\nadhere\nbunk\nairfields\n##ritan\n1741\nencompass\nremington\ntran\n##erative\namelie\nmazda\nfriar\nmorals\npassions\n##zai\nbreadth\nvis\n##hae\nargus\nburnham\ncaressing\ninsider\nrudd\n##imov\n##mini\n##rso\nitalianate\nmurderous\ntextual\nwainwright\narmada\nbam\nweave\ntimer\n##taken\n##nh\nfra\n##crest\nardent\nsalazar\ntaps\ntunis\n##ntino\nallegro\ngland\nphilanthropic\n##chester\nimplication\n##optera\nesq\njudas\nnoticeably\nwynn\n##dara\ninched\nindexed\ncrises\nvilliers\nbandit\nroyalties\npatterned\ncupboard\ninterspersed\naccessory\nisla\nkendrick\nentourage\nstitches\n##esthesia\nheadwaters\n##ior\ninterlude\ndistraught\ndraught\n1727\n##basket\nbiased\nsy\ntransient\ntriad\nsubgenus\nadapting\nkidd\nshortstop\n##umatic\ndimly\nspiked\nmcleod\nreprint\nnellie\npretoria\nwindmill\n##cek\nsingled\n##mps\n273\nreunite\n##orous\n747\nbankers\noutlying\n##omp\n##ports\n##tream\napologies\ncosmetics\npatsy\n##deh\n##ocks\n##yson\nbender\nnantes\nserene\n##nad\nlucha\nmmm\n323\n##cius\n##gli\ncmll\ncoinage\nnestor\njuarez\n##rook\nsmeared\nsprayed\ntwitching\nsterile\nirina\nembodied\njuveniles\nenveloped\nmiscellaneous\ncancers\ndq\ngulped\nluisa\ncrested\nswat\ndonegal\nref\n##anov\n##acker\nhearst\nmercantile\n##lika\ndoorbell\nua\nvicki\n##alla\n##som\nbilbao\npsychologists\nstryker\nsw\nhorsemen\nturkmenistan\nwits\n##national\nanson\nmathew\nscreenings\n##umb\nrihanna\n##agne\n##nessy\naisles\n##iani\n##osphere\nhines\nkenton\nsaskatoon\ntasha\ntruncated\n##champ\n##itan\nmildred\nadvises\nfredrik\ninterpreting\ninhibitors\n##athi\nspectroscopy\n##hab\n##kong\nkarim\npanda\n##oia\n##nail\n##vc\nconqueror\nkgb\nleukemia\n##dity\narrivals\ncheered\npisa\nphosphorus\nshielded\n##riated\nmammal\nunitarian\nurgently\nchopin\nsanitary\n##mission\nspicy\ndrugged\nhinges\n##tort\ntipping\ntrier\nimpoverished\nwestchester\n##caster\n267\nepoch\nnonstop\n##gman\n##khov\naromatic\ncentrally\ncerro\n##tively\n##vio\nbillions\nmodulation\nsedimentary\n283\nfacilitating\noutrageous\ngoldstein\n##eak\n##kt\nld\nmaitland\npenultimate\npollard\n##dance\nfleets\nspaceship\nvertebrae\n##nig\nalcoholism\nals\nrecital\n##bham\n##ference\n##omics\nm2\n##bm\ntrois\n##tropical\n##в\ncommemorates\n##meric\nmarge\n##raction\n1643\n670\ncosmetic\nravaged\n##ige\ncatastrophe\neng\n##shida\nalbrecht\narterial\nbellamy\ndecor\nharmon\n##rde\nbulbs\nsynchronized\nvito\neasiest\nshetland\nshielding\nwnba\n##glers\n##ssar\n##riam\nbrianna\ncumbria\n##aceous\n##rard\ncores\nthayer\n##nsk\nbrood\nhilltop\nluminous\ncarts\nkeynote\nlarkin\nlogos\n##cta\n##ا\n##mund\n##quay\nlilith\ntinted\n277\nwrestle\nmobilization\n##uses\nsequential\nsiam\nbloomfield\ntakahashi\n274\n##ieving\npresenters\nringo\nblazed\nwitty\n##oven\n##ignant\ndevastation\nhaydn\nharmed\nnewt\ntherese\n##peed\ngershwin\nmolina\nrabbis\nsudanese\n001\ninnate\nrestarted\n##sack\n##fus\nslices\nwb\n##shah\nenroll\nhypothetical\nhysterical\n1743\nfabio\nindefinite\nwarped\n##hg\nexchanging\n525\nunsuitable\n##sboro\ngallo\n1603\nbret\ncobalt\nhomemade\n##hunter\nmx\noperatives\n##dhar\nterraces\ndurable\nlatch\npens\nwhorls\n##ctuated\n##eaux\nbilling\nligament\nsuccumbed\n##gly\nregulators\nspawn\n##brick\n##stead\nfilmfare\nrochelle\n##nzo\n1725\ncircumstance\nsaber\nsupplements\n##nsky\n##tson\ncrowe\nwellesley\ncarrot\n##9th\n##movable\nprimate\ndrury\nsincerely\ntopical\n##mad\n##rao\ncallahan\nkyiv\nsmarter\ntits\nundo\n##yeh\nannouncements\nanthologies\nbarrio\nnebula\n##islaus\n##shaft\n##tyn\nbodyguards\n2021\nassassinate\nbarns\nemmett\nscully\n##mah\n##yd\n##eland\n##tino\n##itarian\ndemoted\ngorman\nlashed\nprized\nadventist\nwrit\n##gui\nalla\ninvertebrates\n##ausen\n1641\namman\n1742\nalign\nhealy\nredistribution\n##gf\n##rize\ninsulation\n##drop\nadherents\nhezbollah\nvitro\nferns\nyanking\n269\nphp\nregistering\nuppsala\ncheerleading\nconfines\nmischievous\ntully\n##ross\n49th\ndocked\nroam\nstipulated\npumpkin\n##bry\nprompt\n##ezer\nblindly\nshuddering\ncraftsmen\nfrail\nscented\nkatharine\nscramble\nshaggy\nsponge\nhelix\nzaragoza\n279\n##52\n43rd\nbacklash\nfontaine\nseizures\nposse\ncowan\nnonfiction\ntelenovela\nwwii\nhammered\nundone\n##gpur\nencircled\nirs\n##ivation\nartefacts\noneself\nsearing\nsmallpox\n##belle\n##osaurus\nshandong\nbreached\nupland\nblushing\nrankin\ninfinitely\npsyche\ntolerated\ndocking\nevicted\n##col\nunmarked\n##lving\ngnome\nlettering\nlitres\nmusique\n##oint\nbenevolent\n##jal\nblackened\n##anna\nmccall\nracers\ntingle\n##ocene\n##orestation\nintroductions\nradically\n292\n##hiff\n##باد\n1610\n1739\nmunchen\nplead\n##nka\ncondo\nscissors\n##sight\n##tens\napprehension\n##cey\n##yin\nhallmark\nwatering\nformulas\nsequels\n##llas\naggravated\nbae\ncommencing\n##building\nenfield\nprohibits\nmarne\nvedic\ncivilized\neuclidean\njagger\nbeforehand\nblasts\ndumont\n##arney\n##nem\n740\nconversions\nhierarchical\nrios\nsimulator\n##dya\n##lellan\nhedges\noleg\nthrusts\nshadowed\ndarby\nmaximize\n1744\ngregorian\n##nded\n##routed\nsham\nunspecified\n##hog\nemory\nfactual\n##smo\n##tp\nfooled\n##rger\nortega\nwellness\nmarlon\n##oton\n##urance\ncasket\nkeating\nley\nenclave\n##ayan\nchar\ninfluencing\njia\n##chenko\n412\nammonia\nerebidae\nincompatible\nviolins\ncornered\n##arat\ngrooves\nastronauts\ncolumbian\nrampant\nfabrication\nkyushu\nmahmud\nvanish\n##dern\nmesopotamia\n##lete\nict\n##rgen\ncaspian\nkenji\npitted\n##vered\n999\ngrimace\nroanoke\ntchaikovsky\ntwinned\n##analysis\n##awan\nxinjiang\narias\nclemson\nkazakh\nsizable\n1662\n##khand\n##vard\nplunge\ntatum\nvittorio\n##nden\ncholera\n##dana\n##oper\nbracing\nindifference\nprojectile\nsuperliga\n##chee\nrealises\nupgrading\n299\nporte\nretribution\n##vies\nnk\nstil\n##resses\nama\nbureaucracy\nblackberry\nbosch\ntestosterone\ncollapses\ngreer\n##pathic\nioc\nfifties\nmalls\n##erved\nbao\nbaskets\nadolescents\nsiegfried\n##osity\n##tosis\nmantra\ndetecting\nexistent\nfledgling\n##cchi\ndissatisfied\ngan\ntelecommunication\nmingled\nsobbed\n6000\ncontroversies\noutdated\ntaxis\n##raus\nfright\nslams\n##lham\n##fect\n##tten\ndetectors\nfetal\ntanned\n##uw\nfray\ngoth\nolympian\nskipping\nmandates\nscratches\nsheng\nunspoken\nhyundai\ntracey\nhotspur\nrestrictive\n##buch\namericana\nmundo\n##bari\nburroughs\ndiva\nvulcan\n##6th\ndistinctions\nthumping\n##ngen\nmikey\nsheds\nfide\nrescues\nspringsteen\nvested\nvaluation\n##ece\n##ely\npinnacle\nrake\nsylvie\n##edo\nalmond\nquivering\n##irus\nalteration\nfaltered\n##wad\n51st\nhydra\nticked\n##kato\nrecommends\n##dicated\nantigua\narjun\nstagecoach\nwilfred\ntrickle\npronouns\n##pon\naryan\nnighttime\n##anian\ngall\npea\nstitch\n##hei\nleung\nmilos\n##dini\neritrea\nnexus\nstarved\nsnowfall\nkant\nparasitic\ncot\ndiscus\nhana\nstrikers\nappleton\nkitchens\n##erina\n##partisan\n##itha\n##vius\ndisclose\nmetis\n##channel\n1701\ntesla\n##vera\nfitch\n1735\nblooded\n##tila\ndecimal\n##tang\n##bai\ncyclones\neun\nbottled\npeas\npensacola\nbasha\nbolivian\ncrabs\nboil\nlanterns\npartridge\nroofed\n1645\nnecks\n##phila\nopined\npatting\n##kla\n##lland\nchuckles\nvolta\nwhereupon\n##nche\ndevout\neuroleague\nsuicidal\n##dee\ninherently\ninvoluntary\nknitting\nnasser\n##hide\npuppets\ncolourful\ncourageous\nsouthend\nstills\nmiraculous\nhodgson\nricher\nrochdale\nethernet\ngreta\nuniting\nprism\numm\n##haya\n##itical\n##utation\ndeterioration\npointe\nprowess\n##ropriation\nlids\nscranton\nbillings\nsubcontinent\n##koff\n##scope\nbrute\nkellogg\npsalms\ndegraded\n##vez\nstanisław\n##ructured\nferreira\npun\nastonishing\ngunnar\n##yat\narya\nprc\ngottfried\n##tight\nexcursion\n##ographer\ndina\n##quil\n##nare\nhuffington\nillustrious\nwilbur\ngundam\nverandah\n##zard\nnaacp\n##odle\nconstructive\nfjord\nkade\n##naud\ngenerosity\nthrilling\nbaseline\ncayman\nfrankish\nplastics\naccommodations\nzoological\n##fting\ncedric\nqb\nmotorized\n##dome\n##otted\nsquealed\ntackled\ncanucks\nbudgets\nsitu\nasthma\ndail\ngabled\ngrasslands\nwhimpered\nwrithing\njudgments\n##65\nminnie\npv\n##carbon\nbananas\ngrille\ndomes\nmonique\nodin\nmaguire\nmarkham\ntierney\n##estra\n##chua\nlibel\npoke\nspeedy\natrium\nlaval\nnotwithstanding\n##edly\nfai\nkala\n##sur\nrobb\n##sma\nlistings\nluz\nsupplementary\ntianjin\n##acing\nenzo\njd\nric\nscanner\ncroats\ntranscribed\n##49\narden\ncv\n##hair\n##raphy\n##lver\n##uy\n357\nseventies\nstaggering\nalam\nhorticultural\nhs\nregression\ntimbers\nblasting\n##ounded\nmontagu\nmanipulating\n##cit\ncatalytic\n1550\ntroopers\n##meo\ncondemnation\nfitzpatrick\n##oire\n##roved\ninexperienced\n1670\ncastes\n##lative\nouting\n314\ndubois\nflicking\nquarrel\nste\nlearners\n1625\niq\nwhistled\n##class\n282\nclassify\ntariffs\ntemperament\n355\nfolly\nliszt\n##yles\nimmersed\njordanian\nceasefire\napparel\nextras\nmaru\nfished\n##bio\nharta\nstockport\nassortment\ncraftsman\nparalysis\ntransmitters\n##cola\nblindness\n##wk\nfatally\nproficiency\nsolemnly\n##orno\nrepairing\namore\ngroceries\nultraviolet\n##chase\nschoolhouse\n##tua\nresurgence\nnailed\n##otype\n##×\nruse\nsaliva\ndiagrams\n##tructing\nalbans\nrann\nthirties\n1b\nantennas\nhilarious\ncougars\npaddington\nstats\n##eger\nbreakaway\nipod\nreza\nauthorship\nprohibiting\nscoffed\n##etz\n##ttle\nconscription\ndefected\ntrondheim\n##fires\nivanov\nkeenan\n##adan\n##ciful\n##fb\n##slow\nlocating\n##ials\n##tford\ncadiz\nbasalt\nblankly\ninterned\nrags\nrattling\n##tick\ncarpathian\nreassured\nsync\nbum\nguildford\niss\nstaunch\n##onga\nastronomers\nsera\nsofie\nemergencies\nsusquehanna\n##heard\nduc\nmastery\nvh1\nwilliamsburg\nbayer\nbuckled\ncraving\n##khan\n##rdes\nbloomington\n##write\nalton\nbarbecue\n##bians\njustine\n##hri\n##ndt\ndelightful\nsmartphone\nnewtown\nphoton\nretrieval\npeugeot\nhissing\n##monium\n##orough\nflavors\nlighted\nrelaunched\ntainted\n##games\n##lysis\nanarchy\nmicroscopic\nhopping\nadept\nevade\nevie\n##beau\ninhibit\nsinn\nadjustable\nhurst\nintuition\nwilton\ncisco\n44th\nlawful\nlowlands\nstockings\nthierry\n##dalen\n##hila\n##nai\nfates\nprank\ntb\nmaison\nlobbied\nprovocative\n1724\n4a\nutopia\n##qual\ncarbonate\ngujarati\npurcell\n##rford\ncurtiss\n##mei\novergrown\narenas\nmediation\nswallows\n##rnik\nrespectful\nturnbull\n##hedron\n##hope\nalyssa\nozone\n##ʻi\nami\ngestapo\njohansson\nsnooker\ncanteen\ncuff\ndeclines\nempathy\nstigma\n##ags\n##iner\n##raine\ntaxpayers\ngui\nvolga\n##wright\n##copic\nlifespan\novercame\ntattooed\nenactment\ngiggles\n##ador\n##camp\nbarrington\nbribe\nobligatory\norbiting\npeng\n##enas\nelusive\nsucker\n##vating\ncong\nhardship\nempowered\nanticipating\nestrada\ncryptic\ngreasy\ndetainees\nplanck\nsudbury\nplaid\ndod\nmarriott\nkayla\n##ears\n##vb\n##zd\nmortally\n##hein\ncognition\nradha\n319\nliechtenstein\nmeade\nrichly\nargyle\nharpsichord\nliberalism\ntrumpets\nlauded\ntyrant\nsalsa\ntiled\nlear\npromoters\nreused\nslicing\ntrident\n##chuk\n##gami\n##lka\ncantor\ncheckpoint\n##points\ngaul\nleger\nmammalian\n##tov\n##aar\n##schaft\ndoha\nfrenchman\nnirvana\n##vino\ndelgado\nheadlining\n##eron\n##iography\njug\ntko\n1649\nnaga\nintersections\n##jia\nbenfica\nnawab\n##suka\nashford\ngulp\n##deck\n##vill\n##rug\nbrentford\nfrazier\npleasures\ndunne\npotsdam\nshenzhen\ndentistry\n##tec\nflanagan\n##dorff\n##hear\nchorale\ndinah\nprem\nquezon\n##rogated\nrelinquished\nsutra\nterri\n##pani\nflaps\n##rissa\npoly\n##rnet\nhomme\naback\n##eki\nlinger\nwomb\n##kson\n##lewood\ndoorstep\northodoxy\nthreaded\nwestfield\n##rval\ndioceses\nfridays\nsubsided\n##gata\nloyalists\n##biotic\n##ettes\nletterman\nlunatic\nprelate\ntenderly\ninvariably\nsouza\nthug\nwinslow\n##otide\nfurlongs\ngogh\njeopardy\n##runa\npegasus\n##umble\nhumiliated\nstandalone\ntagged\n##roller\nfreshmen\nklan\n##bright\nattaining\ninitiating\ntransatlantic\nlogged\nviz\n##uance\n1723\ncombatants\nintervening\nstephane\nchieftain\ndespised\ngrazed\n317\ncdc\ngalveston\ngodzilla\nmacro\nsimulate\n##planes\nparades\n##esses\n960\n##ductive\n##unes\nequator\noverdose\n##cans\n##hosh\n##lifting\njoshi\nepstein\nsonora\ntreacherous\naquatics\nmanchu\nresponsive\n##sation\nsupervisory\n##christ\n##llins\n##ibar\n##balance\n##uso\nkimball\nkarlsruhe\nmab\n##emy\nignores\nphonetic\nreuters\nspaghetti\n820\nalmighty\ndanzig\nrumbling\ntombstone\ndesignations\nlured\noutset\n##felt\nsupermarkets\n##wt\ngrupo\nkei\nkraft\nsusanna\n##blood\ncomprehension\ngenealogy\n##aghan\n##verted\nredding\n##ythe\n1722\nbowing\n##pore\n##roi\nlest\nsharpened\nfulbright\nvalkyrie\nsikhs\n##unds\nswans\nbouquet\nmerritt\n##tage\n##venting\ncommuted\nredhead\nclerks\nleasing\ncesare\ndea\nhazy\n##vances\nfledged\ngreenfield\nservicemen\n##gical\narmando\nblackout\ndt\nsagged\ndownloadable\nintra\npotion\npods\n##4th\n##mism\nxp\nattendants\ngambia\nstale\n##ntine\nplump\nasteroids\nrediscovered\nbuds\nflea\nhive\n##neas\n1737\nclassifications\ndebuts\n##eles\nolympus\nscala\n##eurs\n##gno\n##mute\nhummed\nsigismund\nvisuals\nwiggled\nawait\npilasters\nclench\nsulfate\n##ances\nbellevue\nenigma\ntrainee\nsnort\n##sw\nclouded\ndenim\n##rank\n##rder\nchurning\nhartman\nlodges\nriches\nsima\n##missible\naccountable\nsocrates\nregulates\nmueller\n##cr\n1702\navoids\nsolids\nhimalayas\nnutrient\npup\n##jevic\nsquat\nfades\nnec\n##lates\n##pina\n##rona\n##ου\nprivateer\ntequila\n##gative\n##mpton\napt\nhornet\nimmortals\n##dou\nasturias\ncleansing\ndario\n##rries\n##anta\netymology\nservicing\nzhejiang\n##venor\n##nx\nhorned\nerasmus\nrayon\nrelocating\n£10\n##bags\nescalated\npromenade\nstubble\n2010s\nartisans\naxial\nliquids\nmora\nsho\nyoo\n##tsky\nbundles\noldies\n##nally\nnotification\nbastion\n##ths\nsparkle\n##lved\n1728\nleash\npathogen\nhighs\n##hmi\nimmature\n880\ngonzaga\nignatius\nmansions\nmonterrey\nsweets\nbryson\n##loe\npolled\nregatta\nbrightest\npei\nrosy\nsquid\nhatfield\npayroll\naddict\nmeath\ncornerback\nheaviest\nlodging\n##mage\ncapcom\nrippled\n##sily\nbarnet\nmayhem\nymca\nsnuggled\nrousseau\n##cute\nblanchard\n284\nfragmented\nleighton\nchromosomes\nrisking\n##md\n##strel\n##utter\ncorinne\ncoyotes\ncynical\nhiroshi\nyeomanry\n##ractive\nebook\ngrading\nmandela\nplume\nagustin\nmagdalene\n##rkin\nbea\nfemme\ntrafford\n##coll\n##lun\n##tance\n52nd\nfourier\nupton\n##mental\ncamilla\ngust\niihf\nislamabad\nlongevity\n##kala\nfeldman\nnetting\n##rization\nendeavour\nforaging\nmfa\norr\n##open\ngreyish\ncontradiction\ngraz\n##ruff\nhandicapped\nmarlene\ntweed\noaxaca\nspp\ncampos\nmiocene\npri\nconfigured\ncooks\npluto\ncozy\npornographic\n##entes\n70th\nfairness\nglided\njonny\nlynne\nrounding\nsired\n##emon\n##nist\nremade\nuncover\n##mack\ncomplied\nlei\nnewsweek\n##jured\n##parts\n##enting\n##pg\n293\nfiner\nguerrillas\nathenian\ndeng\ndisused\nstepmother\naccuse\ngingerly\nseduction\n521\nconfronting\n##walker\n##going\ngora\nnostalgia\nsabres\nvirginity\nwrenched\n##minated\nsyndication\nwielding\neyre\n##56\n##gnon\n##igny\nbehaved\ntaxpayer\nsweeps\n##growth\nchildless\ngallant\n##ywood\namplified\ngeraldine\nscrape\n##ffi\nbabylonian\nfresco\n##rdan\n##kney\n##position\n1718\nrestricting\ntack\nfukuoka\nosborn\nselector\npartnering\n##dlow\n318\ngnu\nkia\ntak\nwhitley\ngables\n##54\n##mania\nmri\nsoftness\nimmersion\n##bots\n##evsky\n1713\nchilling\ninsignificant\npcs\n##uis\nelites\nlina\npurported\nsupplemental\nteaming\n##americana\n##dding\n##inton\nproficient\nrouen\n##nage\n##rret\nniccolo\nselects\n##bread\nfluffy\n1621\ngruff\nknotted\nmukherjee\npolgara\nthrash\nnicholls\nsecluded\nsmoothing\nthru\ncorsica\nloaf\nwhitaker\ninquiries\n##rrier\n##kam\nindochina\n289\nmarlins\nmyles\npeking\n##tea\nextracts\npastry\nsuperhuman\nconnacht\nvogel\n##ditional\n##het\n##udged\n##lash\ngloss\nquarries\nrefit\nteaser\n##alic\n##gaon\n20s\nmaterialized\nsling\ncamped\npickering\ntung\ntracker\npursuant\n##cide\ncranes\nsoc\n##cini\n##typical\n##viere\nanhalt\noverboard\nworkout\nchores\nfares\norphaned\nstains\n##logie\nfenton\nsurpassing\njoyah\ntriggers\n##itte\ngrandmaster\n##lass\n##lists\nclapping\nfraudulent\nledger\nnagasaki\n##cor\n##nosis\n##tsa\neucalyptus\ntun\n##icio\n##rney\n##tara\ndax\nheroism\nina\nwrexham\nonboard\nunsigned\n##dates\nmoshe\ngalley\nwinnie\ndroplets\nexiles\npraises\nwatered\nnoodles\n##aia\nfein\nadi\nleland\nmulticultural\nstink\nbingo\ncomets\nerskine\nmodernized\ncanned\nconstraint\ndomestically\nchemotherapy\nfeatherweight\nstifled\n##mum\ndarkly\nirresistible\nrefreshing\nhasty\nisolate\n##oys\nkitchener\nplanners\n##wehr\ncages\nyarn\nimplant\ntoulon\nelects\nchildbirth\nyue\n##lind\n##lone\ncn\nrightful\nsportsman\njunctions\nremodeled\nspecifies\n##rgh\n291\n##oons\ncomplimented\n##urgent\nlister\not\n##logic\nbequeathed\ncheekbones\nfontana\ngabby\n##dial\namadeus\ncorrugated\nmaverick\nresented\ntriangles\n##hered\n##usly\nnazareth\ntyrol\n1675\nassent\npoorer\nsectional\naegean\n##cous\n296\nnylon\nghanaian\n##egorical\n##weig\ncushions\nforbid\nfusiliers\nobstruction\nsomerville\n##scia\ndime\nearrings\nelliptical\nleyte\noder\npolymers\ntimmy\natm\nmidtown\npiloted\nsettles\ncontinual\nexternally\nmayfield\n##uh\nenrichment\nhenson\nkeane\npersians\n1733\nbenji\nbraden\npep\n324\n##efe\ncontenders\npepsi\nvalet\n##isches\n298\n##asse\n##earing\ngoofy\nstroll\n##amen\nauthoritarian\noccurrences\nadversary\nahmedabad\ntangent\ntoppled\ndorchester\n1672\nmodernism\nmarxism\nislamist\ncharlemagne\nexponential\nracks\nunicode\nbrunette\nmbc\npic\nskirmish\n##bund\n##lad\n##powered\n##yst\nhoisted\nmessina\nshatter\n##ctum\njedi\nvantage\n##music\n##neil\nclemens\nmahmoud\ncorrupted\nauthentication\nlowry\nnils\n##washed\nomnibus\nwounding\njillian\n##itors\n##opped\nserialized\nnarcotics\nhandheld\n##arm\n##plicity\nintersecting\nstimulating\n##onis\ncrate\nfellowships\nhemingway\ncasinos\nclimatic\nfordham\ncopeland\ndrip\nbeatty\nleaflets\nrobber\nbrothel\nmadeira\n##hedral\nsphinx\nultrasound\n##vana\nvalor\nforbade\nleonid\nvillas\n##aldo\nduane\nmarquez\n##cytes\ndisadvantaged\nforearms\nkawasaki\nreacts\nconsular\nlax\nuncles\nuphold\n##hopper\nconcepcion\ndorsey\nlass\n##izan\narching\npassageway\n1708\nresearches\ntia\ninternationals\n##graphs\n##opers\ndistinguishes\njavanese\ndivert\n##uven\nplotted\n##listic\n##rwin\n##erik\n##tify\naffirmative\nsignifies\nvalidation\n##bson\nkari\nfelicity\ngeorgina\nzulu\n##eros\n##rained\n##rath\novercoming\n##dot\nargyll\n##rbin\n1734\nchiba\nratification\nwindy\nearls\nparapet\n##marks\nhunan\npristine\nastrid\npunta\n##gart\nbrodie\n##kota\n##oder\nmalaga\nminerva\nrouse\n##phonic\nbellowed\npagoda\nportals\nreclamation\n##gur\n##odies\n##⁄₄\nparentheses\nquoting\nallergic\npalette\nshowcases\nbenefactor\nheartland\nnonlinear\n##tness\nbladed\ncheerfully\nscans\n##ety\n##hone\n1666\ngirlfriends\npedersen\nhiram\nsous\n##liche\n##nator\n1683\n##nery\n##orio\n##umen\nbobo\nprimaries\nsmiley\n##cb\nunearthed\nuniformly\nfis\nmetadata\n1635\nind\n##oted\nrecoil\n##titles\n##tura\n##ια\n406\nhilbert\njamestown\nmcmillan\ntulane\nseychelles\n##frid\nantics\ncoli\nfated\nstucco\n##grants\n1654\nbulky\naccolades\narrays\ncaledonian\ncarnage\noptimism\npuebla\n##tative\n##cave\nenforcing\nrotherham\nseo\ndunlop\naeronautics\nchimed\nincline\nzoning\narchduke\nhellenistic\n##oses\n##sions\ncandi\nthong\n##ople\nmagnate\nrustic\n##rsk\nprojective\nslant\n##offs\ndanes\nhollis\nvocalists\n##ammed\ncongenital\ncontend\ngesellschaft\n##ocating\n##pressive\ndouglass\nquieter\n##cm\n##kshi\nhowled\nsalim\nspontaneously\ntownsville\nbuena\nsouthport\n##bold\nkato\n1638\nfaerie\nstiffly\n##vus\n##rled\n297\nflawless\nrealising\ntaboo\n##7th\nbytes\nstraightening\n356\njena\n##hid\n##rmin\ncartwright\nberber\nbertram\nsoloists\n411\nnoses\n417\ncoping\nfission\nhardin\ninca\n##cen\n1717\nmobilized\nvhf\n##raf\nbiscuits\ncurate\n##85\n##anial\n331\ngaunt\nneighbourhoods\n1540\n##abas\nblanca\nbypassed\nsockets\nbehold\ncoincidentally\n##bane\nnara\nshave\nsplinter\nterrific\n##arion\n##erian\ncommonplace\njuris\nredwood\nwaistband\nboxed\ncaitlin\nfingerprints\njennie\nnaturalized\n##ired\nbalfour\ncraters\njody\nbungalow\nhugely\nquilt\nglitter\npigeons\nundertaker\nbulging\nconstrained\ngoo\n##sil\n##akh\nassimilation\nreworked\n##person\npersuasion\n##pants\nfelicia\n##cliff\n##ulent\n1732\nexplodes\n##dun\n##inium\n##zic\nlyman\nvulture\nhog\noverlook\nbegs\nnorthwards\now\nspoil\n##urer\nfatima\nfavorably\naccumulate\nsargent\nsorority\ncorresponded\ndispersal\nkochi\ntoned\n##imi\n##lita\ninternacional\nnewfound\n##agger\n##lynn\n##rigue\nbooths\npeanuts\n##eborg\nmedicare\nmuriel\nnur\n##uram\ncrates\nmillennia\npajamas\nworsened\n##breakers\njimi\nvanuatu\nyawned\n##udeau\ncarousel\n##hony\nhurdle\n##ccus\n##mounted\n##pod\nrv\n##eche\nairship\nambiguity\ncompulsion\nrecapture\n##claiming\narthritis\n##osomal\n1667\nasserting\nngc\nsniffing\ndade\ndiscontent\nglendale\nported\n##amina\ndefamation\nrammed\n##scent\nfling\nlivingstone\n##fleet\n875\n##ppy\napocalyptic\ncomrade\nlcd\n##lowe\ncessna\neine\npersecuted\nsubsistence\ndemi\nhoop\nreliefs\n710\ncoptic\nprogressing\nstemmed\nperpetrators\n1665\npriestess\n##nio\ndobson\nebony\nrooster\nitf\ntortricidae\n##bbon\n##jian\ncleanup\n##jean\n##øy\n1721\neighties\ntaxonomic\nholiness\n##hearted\n##spar\nantilles\nshowcasing\nstabilized\n##nb\ngia\nmascara\nmichelangelo\ndawned\n##uria\n##vinsky\nextinguished\nfitz\ngrotesque\n£100\n##fera\n##loid\n##mous\nbarges\nneue\nthrobbed\ncipher\njohnnie\n##a1\n##mpt\noutburst\n##swick\nspearheaded\nadministrations\nc1\nheartbreak\npixels\npleasantly\n##enay\nlombardy\nplush\n##nsed\nbobbie\n##hly\nreapers\ntremor\nxiang\nminogue\nsubstantive\nhitch\nbarak\n##wyl\nkwan\n##encia\n910\nobscene\nelegance\nindus\nsurfer\nbribery\nconserve\n##hyllum\n##masters\nhoratio\n##fat\napes\nrebound\npsychotic\n##pour\niteration\n##mium\n##vani\nbotanic\nhorribly\nantiques\ndispose\npaxton\n##hli\n##wg\ntimeless\n1704\ndisregard\nengraver\nhounds\n##bau\n##version\nlooted\nuno\nfacilitates\ngroans\nmasjid\nrutland\nantibody\ndisqualification\ndecatur\nfootballers\nquake\nslacks\n48th\nrein\nscribe\nstabilize\ncommits\nexemplary\ntho\n##hort\n##chison\npantry\ntraversed\n##hiti\ndisrepair\nidentifiable\nvibrated\nbaccalaureate\n##nnis\ncsa\ninterviewing\n##iensis\n##raße\ngreaves\nwealthiest\n343\nclassed\njogged\n£5\n##58\n##atal\nilluminating\nknicks\nrespecting\n##uno\nscrubbed\n##iji\n##dles\nkruger\nmoods\ngrowls\nraider\nsilvia\nchefs\nkam\nvr\ncree\npercival\n##terol\ngunter\ncounterattack\ndefiant\nhenan\nze\n##rasia\n##riety\nequivalence\nsubmissions\n##fra\n##thor\nbautista\nmechanically\n##heater\ncornice\nherbal\ntemplar\n##mering\noutputs\nruining\nligand\nrenumbered\nextravagant\nmika\nblockbuster\neta\ninsurrection\n##ilia\ndarkening\nferocious\npianos\nstrife\nkinship\n##aer\nmelee\n##anor\n##iste\n##may\n##oue\ndecidedly\nweep\n##jad\n##missive\n##ppel\n354\npuget\nunease\n##gnant\n1629\nhammering\nkassel\nob\nwessex\n##lga\nbromwich\negan\nparanoia\nutilization\n##atable\n##idad\ncontradictory\nprovoke\n##ols\n##ouring\n##tangled\nknesset\n##very\n##lette\nplumbing\n##sden\n##¹\ngreensboro\noccult\nsniff\n338\nzev\nbeaming\ngamer\nhaggard\nmahal\n##olt\n##pins\nmendes\nutmost\nbriefing\ngunnery\n##gut\n##pher\n##zh\n##rok\n1679\nkhalifa\nsonya\n##boot\nprincipals\nurbana\nwiring\n##liffe\n##minating\n##rrado\ndahl\nnyu\nskepticism\nnp\ntownspeople\nithaca\nlobster\nsomethin\n##fur\n##arina\n##−1\nfreighter\nzimmerman\nbiceps\ncontractual\n##herton\namend\nhurrying\nsubconscious\n##anal\n336\nmeng\nclermont\nspawning\n##eia\n##lub\ndignitaries\nimpetus\nsnacks\nspotting\ntwigs\n##bilis\n##cz\n##ouk\nlibertadores\nnic\nskylar\n##aina\n##firm\ngustave\nasean\n##anum\ndieter\nlegislatures\nflirt\nbromley\ntrolls\numar\n##bbies\n##tyle\nblah\nparc\nbridgeport\ncrank\nnegligence\n##nction\n46th\nconstantin\nmolded\nbandages\nseriousness\n00pm\nsiegel\ncarpets\ncompartments\nupbeat\nstatehood\n##dner\n##edging\nmarko\n730\nplatt\n##hane\npaving\n##iy\n1738\nabbess\nimpatience\nlimousine\nnbl\n##talk\n441\nlucille\nmojo\nnightfall\nrobbers\n##nais\nkarel\nbrisk\ncalves\nreplicate\nascribed\ntelescopes\n##olf\nintimidated\n##reen\nballast\nspecialization\n##sit\naerodynamic\ncaliphate\nrainer\nvisionary\n##arded\nepsilon\n##aday\n##onte\naggregation\nauditory\nboosted\nreunification\nkathmandu\nloco\nrobyn\n402\nacknowledges\nappointing\nhumanoid\nnewell\nredeveloped\nrestraints\n##tained\nbarbarians\nchopper\n1609\nitaliana\n##lez\n##lho\ninvestigates\nwrestlemania\n##anies\n##bib\n690\n##falls\ncreaked\ndragoons\ngravely\nminions\nstupidity\nvolley\n##harat\n##week\nmusik\n##eries\n##uously\nfungal\nmassimo\nsemantics\nmalvern\n##ahl\n##pee\ndiscourage\nembryo\nimperialism\n1910s\nprofoundly\n##ddled\njiangsu\nsparkled\nstat\n##holz\nsweatshirt\ntobin\n##iction\nsneered\n##cheon\n##oit\nbrit\ncausal\nsmyth\n##neuve\ndiffuse\nperrin\nsilvio\n##ipes\n##recht\ndetonated\niqbal\nselma\n##nism\n##zumi\nroasted\n##riders\ntay\n##ados\n##mament\n##mut\n##rud\n840\ncompletes\nnipples\ncfa\nflavour\nhirsch\n##laus\ncalderon\nsneakers\nmoravian\n##ksha\n1622\nrq\n294\n##imeters\nbodo\n##isance\n##pre\n##ronia\nanatomical\nexcerpt\n##lke\ndh\nkunst\n##tablished\n##scoe\nbiomass\npanted\nunharmed\ngael\nhousemates\nmontpellier\n##59\ncoa\nrodents\ntonic\nhickory\nsingleton\n##taro\n451\n1719\naldo\nbreaststroke\ndempsey\noch\nrocco\n##cuit\nmerton\ndissemination\nmidsummer\nserials\n##idi\nhaji\npolynomials\n##rdon\ngs\nenoch\nprematurely\nshutter\ntaunton\n£3\n##grating\n##inates\narchangel\nharassed\n##asco\n326\narchway\ndazzling\n##ecin\n1736\nsumo\nwat\n##kovich\n1086\nhonneur\n##ently\n##nostic\n##ttal\n##idon\n1605\n403\n1716\nblogger\nrents\n##gnan\nhires\n##ikh\n##dant\nhowie\n##rons\nhandler\nretracted\nshocks\n1632\narun\nduluth\nkepler\ntrumpeter\n##lary\npeeking\nseasoned\ntrooper\n##mara\nlaszlo\n##iciencies\n##rti\nheterosexual\n##inatory\n##ssion\nindira\njogging\n##inga\n##lism\nbeit\ndissatisfaction\nmalice\n##ately\nnedra\npeeling\n##rgeon\n47th\nstadiums\n475\nvertigo\n##ains\niced\nrestroom\n##plify\n##tub\nillustrating\npear\n##chner\n##sibility\ninorganic\nrappers\nreceipts\nwatery\n##kura\nlucinda\n##oulos\nreintroduced\n##8th\n##tched\ngracefully\nsaxons\nnutritional\nwastewater\nrained\nfavourites\nbedrock\nfisted\nhallways\nlikeness\nupscale\n##lateral\n1580\nblinds\nprequel\n##pps\n##tama\ndeter\nhumiliating\nrestraining\ntn\nvents\n1659\nlaundering\nrecess\nrosary\ntractors\ncoulter\nfederer\n##ifiers\n##plin\npersistence\n##quitable\ngeschichte\npendulum\nquakers\n##beam\nbassett\npictorial\nbuffet\nkoln\n##sitor\ndrills\nreciprocal\nshooters\n##57\n##cton\n##tees\nconverge\npip\ndmitri\ndonnelly\nyamamoto\naqua\nazores\ndemographics\nhypnotic\nspitfire\nsuspend\nwryly\nroderick\n##rran\nsebastien\n##asurable\nmavericks\n##fles\n##200\nhimalayan\nprodigy\n##iance\ntransvaal\ndemonstrators\nhandcuffs\ndodged\nmcnamara\nsublime\n1726\ncrazed\n##efined\n##till\nivo\npondered\nreconciled\nshrill\nsava\n##duk\nbal\ncad\nheresy\njaipur\ngoran\n##nished\n341\nlux\nshelly\nwhitehall\n##hre\nisraelis\npeacekeeping\n##wled\n1703\ndemetrius\nousted\n##arians\n##zos\nbeale\nanwar\nbackstroke\nraged\nshrinking\ncremated\n##yck\nbenign\ntowing\nwadi\ndarmstadt\nlandfill\nparana\nsoothe\ncolleen\nsidewalks\nmayfair\ntumble\nhepatitis\nferrer\nsuperstructure\n##gingly\n##urse\n##wee\nanthropological\ntranslators\n##mies\ncloseness\nhooves\n##pw\nmondays\n##roll\n##vita\nlandscaping\n##urized\npurification\nsock\nthorns\nthwarted\njalan\ntiberius\n##taka\nsaline\n##rito\nconfidently\nkhyber\nsculptors\n##ij\nbrahms\nhammersmith\ninspectors\nbattista\nfivb\nfragmentation\nhackney\n##uls\narresting\nexercising\nantoinette\nbedfordshire\n##zily\ndyed\n##hema\n1656\nracetrack\nvariability\n##tique\n1655\naustrians\ndeteriorating\nmadman\ntheorists\naix\nlehman\nweathered\n1731\ndecreed\neruptions\n1729\nflaw\nquinlan\nsorbonne\nflutes\nnunez\n1711\nadored\ndownwards\nfable\nrasped\n1712\nmoritz\nmouthful\nrenegade\nshivers\nstunts\ndysfunction\nrestrain\ntranslit\n327\npancakes\n##avio\n##cision\n##tray\n351\nvial\n##lden\nbain\n##maid\n##oxide\nchihuahua\nmalacca\nvimes\n##rba\n##rnier\n1664\ndonnie\nplaques\n##ually\n337\nbangs\nfloppy\nhuntsville\nloretta\nnikolay\n##otte\neater\nhandgun\nubiquitous\n##hett\neras\nzodiac\n1634\n##omorphic\n1820s\n##zog\ncochran\n##bula\n##lithic\nwarring\n##rada\ndalai\nexcused\nblazers\nmcconnell\nreeling\nbot\neste\n##abi\ngeese\nhoax\ntaxon\n##bla\nguitarists\n##icon\ncondemning\nhunts\ninversion\nmoffat\ntaekwondo\n##lvis\n1624\nstammered\n##rest\n##rzy\nsousa\nfundraiser\nmarylebone\nnavigable\nuptown\ncabbage\ndaniela\nsalman\nshitty\nwhimper\n##kian\n##utive\nprogrammers\nprotections\nrm\n##rmi\n##rued\nforceful\n##enes\nfuss\n##tao\n##wash\nbrat\noppressive\nreykjavik\nspartak\nticking\n##inkles\n##kiewicz\nadolph\nhorst\nmaui\nprotege\nstraighten\ncpc\nlandau\nconcourse\nclements\nresultant\n##ando\nimaginative\njoo\nreactivated\n##rem\n##ffled\n##uising\nconsultative\n##guide\nflop\nkaitlyn\nmergers\nparenting\nsomber\n##vron\nsupervise\nvidhan\n##imum\ncourtship\nexemplified\nharmonies\nmedallist\nrefining\n##rrow\n##ка\namara\n##hum\n780\ngoalscorer\nsited\novershadowed\nrohan\ndispleasure\nsecretive\nmultiplied\nosman\n##orth\nengravings\npadre\n##kali\n##veda\nminiatures\nmis\n##yala\nclap\npali\nrook\n##cana\n1692\n57th\nantennae\nastro\noskar\n1628\nbulldog\ncrotch\nhackett\nyucatan\n##sure\namplifiers\nbrno\nferrara\nmigrating\n##gree\nthanking\nturing\n##eza\nmccann\nting\nandersson\nonslaught\ngaines\nganga\nincense\nstandardization\n##mation\nsentai\nscuba\nstuffing\nturquoise\nwaivers\nalloys\n##vitt\nregaining\nvaults\n##clops\n##gizing\ndigger\nfurry\nmemorabilia\nprobing\n##iad\npayton\nrec\ndeutschland\nfilippo\nopaque\nseamen\nzenith\nafrikaans\n##filtration\ndisciplined\ninspirational\n##merie\nbanco\nconfuse\ngrafton\ntod\n##dgets\nchampioned\nsimi\nanomaly\nbiplane\n##ceptive\nelectrode\n##para\n1697\ncleavage\ncrossbow\nswirl\ninformant\n##lars\n##osta\nafi\nbonfire\nspec\n##oux\nlakeside\nslump\n##culus\n##lais\n##qvist\n##rrigan\n1016\nfacades\nborg\ninwardly\ncervical\nxl\npointedly\n050\nstabilization\n##odon\nchests\n1699\nhacked\nctv\northogonal\nsuzy\n##lastic\ngaulle\njacobite\nrearview\n##cam\n##erted\nashby\n##drik\n##igate\n##mise\n##zbek\naffectionately\ncanine\ndisperse\nlatham\n##istles\n##ivar\nspielberg\n##orin\n##idium\nezekiel\ncid\n##sg\ndurga\nmiddletown\n##cina\ncustomized\nfrontiers\nharden\n##etano\n##zzy\n1604\nbolsheviks\n##66\ncoloration\nyoko\n##bedo\nbriefs\nslabs\ndebra\nliquidation\nplumage\n##oin\nblossoms\ndementia\nsubsidy\n1611\nproctor\nrelational\njerseys\nparochial\nter\n##ici\nesa\npeshawar\ncavalier\nloren\ncpi\nidiots\nshamrock\n1646\ndutton\nmalabar\nmustache\n##endez\n##ocytes\nreferencing\nterminates\nmarche\nyarmouth\n##sop\nacton\nmated\nseton\nsubtly\nbaptised\nbeige\nextremes\njolted\nkristina\ntelecast\n##actic\nsafeguard\nwaldo\n##baldi\n##bular\nendeavors\nsloppy\nsubterranean\n##ensburg\n##itung\ndelicately\npigment\ntq\n##scu\n1626\n##ound\ncollisions\ncoveted\nherds\n##personal\n##meister\n##nberger\nchopra\n##ricting\nabnormalities\ndefective\ngalician\nlucie\n##dilly\nalligator\nlikened\n##genase\nburundi\nclears\ncomplexion\nderelict\ndeafening\ndiablo\nfingered\nchampaign\ndogg\nenlist\nisotope\nlabeling\nmrna\n##erre\nbrilliance\nmarvelous\n##ayo\n1652\ncrawley\nether\nfooted\ndwellers\ndeserts\nhamish\nrubs\nwarlock\nskimmed\n##lizer\n870\nbuick\nembark\nheraldic\nirregularities\n##ajan\nkiara\n##kulam\n##ieg\nantigen\nkowalski\n##lge\noakley\nvisitation\n##mbit\nvt\n##suit\n1570\nmurderers\n##miento\n##rites\nchimneys\n##sling\ncondemn\ncuster\nexchequer\nhavre\n##ghi\nfluctuations\n##rations\ndfb\nhendricks\nvaccines\n##tarian\nnietzsche\nbiking\njuicy\n##duced\nbrooding\nscrolling\nselangor\n##ragan\n352\nannum\nboomed\nseminole\nsugarcane\n##dna\ndepartmental\ndismissing\ninnsbruck\narteries\nashok\nbatavia\ndaze\nkun\novertook\n##rga\n##tlan\nbeheaded\ngaddafi\nholm\nelectronically\nfaulty\ngalilee\nfractures\nkobayashi\n##lized\ngunmen\nmagma\naramaic\nmala\neastenders\ninference\nmessengers\nbf\n##qu\n407\nbathrooms\n##vere\n1658\nflashbacks\nideally\nmisunderstood\n##jali\n##weather\nmendez\n##grounds\n505\nuncanny\n##iii\n1709\nfriendships\n##nbc\nsacrament\naccommodated\nreiterated\nlogistical\npebbles\nthumped\n##escence\nadministering\ndecrees\ndrafts\n##flight\n##cased\n##tula\nfuturistic\npicket\nintimidation\nwinthrop\n##fahan\ninterfered\n339\nafar\nfrancoise\nmorally\nuta\ncochin\ncroft\ndwarfs\n##bruck\n##dents\n##nami\nbiker\n##hner\n##meral\nnano\n##isen\n##ometric\n##pres\n##ан\nbrightened\nmeek\nparcels\nsecurely\ngunners\n##jhl\n##zko\nagile\nhysteria\n##lten\n##rcus\nbukit\nchamps\nchevy\ncuckoo\nleith\nsadler\ntheologians\nwelded\n##section\n1663\njj\nplurality\nxander\n##rooms\n##formed\nshredded\ntemps\nintimately\npau\ntormented\n##lok\n##stellar\n1618\ncharred\nems\nessen\n##mmel\nalarms\nspraying\nascot\nblooms\ntwinkle\n##abia\n##apes\ninternment\nobsidian\n##chaft\nsnoop\n##dav\n##ooping\nmalibu\n##tension\nquiver\n##itia\nhays\nmcintosh\ntravers\nwalsall\n##ffie\n1623\nbeverley\nschwarz\nplunging\nstructurally\nm3\nrosenthal\nvikram\n##tsk\n770\nghz\n##onda\n##tiv\nchalmers\ngroningen\npew\nreckon\nunicef\n##rvis\n55th\n##gni\n1651\nsulawesi\navila\ncai\nmetaphysical\nscrewing\nturbulence\n##mberg\naugusto\nsamba\n56th\nbaffled\nmomentary\ntoxin\n##urian\n##wani\naachen\ncondoms\ndali\nsteppe\n##3d\n##app\n##oed\n##year\nadolescence\ndauphin\nelectrically\ninaccessible\nmicroscopy\nnikita\n##ega\natv\n##cel\n##enter\n##oles\n##oteric\n##ы\naccountants\npunishments\nwrongly\nbribes\nadventurous\nclinch\nflinders\nsouthland\n##hem\n##kata\ngough\n##ciency\nlads\nsoared\n##ה\nundergoes\ndeformation\noutlawed\nrubbish\n##arus\n##mussen\n##nidae\n##rzburg\narcs\n##ingdon\n##tituted\n1695\nwheelbase\nwheeling\nbombardier\ncampground\nzebra\n##lices\n##oj\n##bain\nlullaby\n##ecure\ndonetsk\nwylie\ngrenada\n##arding\n##ης\nsquinting\neireann\nopposes\n##andra\nmaximal\nrunes\n##broken\n##cuting\n##iface\n##ror\n##rosis\nadditive\nbritney\nadultery\ntriggering\n##drome\ndetrimental\naarhus\ncontainment\njc\nswapped\nvichy\n##ioms\nmadly\n##oric\n##rag\nbrant\n##ckey\n##trix\n1560\n1612\nbroughton\nrustling\n##stems\n##uder\nasbestos\nmentoring\n##nivorous\nfinley\nleaps\n##isan\napical\npry\nslits\nsubstitutes\n##dict\nintuitive\nfantasia\ninsistent\nunreasonable\n##igen\n##vna\ndomed\nhannover\nmargot\nponder\n##zziness\nimpromptu\njian\nlc\nrampage\nstemming\n##eft\nandrey\ngerais\nwhichever\namnesia\nappropriated\nanzac\nclicks\nmodifying\nultimatum\ncambrian\nmaids\nverve\nyellowstone\n##mbs\nconservatoire\n##scribe\nadherence\ndinners\nspectra\nimperfect\nmysteriously\nsidekick\ntatar\ntuba\n##aks\n##ifolia\ndistrust\n##athan\n##zle\nc2\nronin\nzac\n##pse\ncelaena\ninstrumentalist\nscents\nskopje\n##mbling\ncomical\ncompensated\nvidal\ncondor\nintersect\njingle\nwavelengths\n##urrent\nmcqueen\n##izzly\ncarp\nweasel\n422\nkanye\nmilitias\npostdoctoral\neugen\ngunslinger\n##ɛ\nfaux\nhospice\n##for\nappalled\nderivation\ndwarves\n##elis\ndilapidated\n##folk\nastoria\nphilology\n##lwyn\n##otho\n##saka\ninducing\nphilanthropy\n##bf\n##itative\ngeek\nmarkedly\nsql\n##yce\nbessie\nindices\nrn\n##flict\n495\nfrowns\nresolving\nweightlifting\ntugs\ncleric\ncontentious\n1653\nmania\nrms\n##miya\n##reate\n##ruck\n##tucket\nbien\neels\nmarek\n##ayton\n##cence\ndiscreet\nunofficially\n##ife\nleaks\n##bber\n1705\n332\ndung\ncompressor\nhillsborough\npandit\nshillings\ndistal\n##skin\n381\n##tat\n##you\nnosed\n##nir\nmangrove\nundeveloped\n##idia\ntextures\n##inho\n##500\n##rise\nae\nirritating\nnay\namazingly\nbancroft\napologetic\ncompassionate\nkata\nsymphonies\n##lovic\nairspace\n##lch\n930\ngifford\nprecautions\nfulfillment\nsevilla\nvulgar\nmartinique\n##urities\nlooting\npiccolo\ntidy\n##dermott\nquadrant\narmchair\nincomes\nmathematicians\nstampede\nnilsson\n##inking\n##scan\nfoo\nquarterfinal\n##ostal\nshang\nshouldered\nsquirrels\n##owe\n344\nvinegar\n##bner\n##rchy\n##systems\ndelaying\n##trics\nars\ndwyer\nrhapsody\nsponsoring\n##gration\nbipolar\ncinder\nstarters\n##olio\n##urst\n421\nsignage\n##nty\naground\nfigurative\nmons\nacquaintances\nduets\nerroneously\nsoyuz\nelliptic\nrecreated\n##cultural\n##quette\n##ssed\n##tma\n##zcz\nmoderator\nscares\n##itaire\n##stones\n##udence\njuniper\nsighting\n##just\n##nsen\nbritten\ncalabria\nry\nbop\ncramer\nforsyth\nstillness\n##л\nairmen\ngathers\nunfit\n##umber\n##upt\ntaunting\n##rip\nseeker\nstreamlined\n##bution\nholster\nschumann\ntread\nvox\n##gano\n##onzo\nstrive\ndil\nreforming\ncovent\nnewbury\npredicting\n##orro\ndecorate\ntre\n##puted\nandover\nie\nasahi\ndept\ndunkirk\ngills\n##tori\nburen\nhuskies\n##stis\n##stov\nabstracts\nbets\nloosen\n##opa\n1682\nyearning\n##glio\n##sir\nberman\neffortlessly\nenamel\nnapoli\npersist\n##peration\n##uez\nattache\nelisa\nb1\ninvitations\n##kic\naccelerating\nreindeer\nboardwalk\nclutches\nnelly\npolka\nstarbucks\n##kei\nadamant\nhuey\nlough\nunbroken\nadventurer\nembroidery\ninspecting\nstanza\n##ducted\nnaia\ntaluka\n##pone\n##roids\nchases\ndeprivation\nflorian\n##jing\n##ppet\nearthly\n##lib\n##ssee\ncolossal\nforeigner\nvet\nfreaks\npatrice\nrosewood\ntriassic\nupstate\n##pkins\ndominates\nata\nchants\nks\nvo\n##400\n##bley\n##raya\n##rmed\n555\nagra\ninfiltrate\n##ailing\n##ilation\n##tzer\n##uppe\n##werk\nbinoculars\nenthusiast\nfujian\nsqueak\n##avs\nabolitionist\nalmeida\nboredom\nhampstead\nmarsden\nrations\n##ands\ninflated\n334\nbonuses\nrosalie\npatna\n##rco\n329\ndetachments\npenitentiary\n54th\nflourishing\nwoolf\n##dion\n##etched\npapyrus\n##lster\n##nsor\n##toy\nbobbed\ndismounted\nendelle\ninhuman\nmotorola\ntbs\nwince\nwreath\n##ticus\nhideout\ninspections\nsanjay\ndisgrace\ninfused\npudding\nstalks\n##urbed\narsenic\nleases\n##hyl\n##rrard\ncollarbone\n##waite\n##wil\ndowry\n##bant\n##edance\ngenealogical\nnitrate\nsalamanca\nscandals\nthyroid\nnecessitated\n##!\n##\"\n###\n##$\n##%\n##&\n##'\n##(\n##)\n##*\n##+\n##,\n##-\n##.\n##/\n##:\n##;\n##<\n##=\n##>\n##?\n##@\n##[\n##\\\n##]\n##^\n##_\n##`\n##{\n##|\n##}\n##~\n##¡\n##¢\n##£\n##¤\n##¥\n##¦\n##§\n##¨\n##©\n##ª\n##«\n##¬\n##®\n##±\n##´\n##µ\n##¶\n##·\n##º\n##»\n##¼\n##¾\n##¿\n##æ\n##ð\n##÷\n##þ\n##đ\n##ħ\n##ŋ\n##œ\n##ƒ\n##ɐ\n##ɑ\n##ɒ\n##ɔ\n##ɕ\n##ə\n##ɡ\n##ɣ\n##ɨ\n##ɪ\n##ɫ\n##ɬ\n##ɯ\n##ɲ\n##ɴ\n##ɹ\n##ɾ\n##ʀ\n##ʁ\n##ʂ\n##ʃ\n##ʉ\n##ʊ\n##ʋ\n##ʌ\n##ʎ\n##ʐ\n##ʑ\n##ʒ\n##ʔ\n##ʰ\n##ʲ\n##ʳ\n##ʷ\n##ʸ\n##ʻ\n##ʼ\n##ʾ\n##ʿ\n##ˈ\n##ˡ\n##ˢ\n##ˣ\n##ˤ\n##β\n##γ\n##δ\n##ε\n##ζ\n##θ\n##κ\n##λ\n##μ\n##ξ\n##ο\n##π\n##ρ\n##σ\n##τ\n##υ\n##φ\n##χ\n##ψ\n##ω\n##б\n##г\n##д\n##ж\n##з\n##м\n##п\n##с\n##у\n##ф\n##х\n##ц\n##ч\n##ш\n##щ\n##ъ\n##э\n##ю\n##ђ\n##є\n##і\n##ј\n##љ\n##њ\n##ћ\n##ӏ\n##ա\n##բ\n##գ\n##դ\n##ե\n##թ\n##ի\n##լ\n##կ\n##հ\n##մ\n##յ\n##ն\n##ո\n##պ\n##ս\n##վ\n##տ\n##ր\n##ւ\n##ք\n##־\n##א\n##ב\n##ג\n##ד\n##ו\n##ז\n##ח\n##ט\n##י\n##ך\n##כ\n##ל\n##ם\n##מ\n##ן\n##נ\n##ס\n##ע\n##ף\n##פ\n##ץ\n##צ\n##ק\n##ר\n##ש\n##ת\n##،\n##ء\n##ب\n##ت\n##ث\n##ج\n##ح\n##خ\n##ذ\n##ز\n##س\n##ش\n##ص\n##ض\n##ط\n##ظ\n##ع\n##غ\n##ـ\n##ف\n##ق\n##ك\n##و\n##ى\n##ٹ\n##پ\n##چ\n##ک\n##گ\n##ں\n##ھ\n##ہ\n##ے\n##अ\n##आ\n##उ\n##ए\n##क\n##ख\n##ग\n##च\n##ज\n##ट\n##ड\n##ण\n##त\n##थ\n##द\n##ध\n##न\n##प\n##ब\n##भ\n##म\n##य\n##र\n##ल\n##व\n##श\n##ष\n##स\n##ह\n##ा\n##ि\n##ी\n##ो\n##।\n##॥\n##ং\n##অ\n##আ\n##ই\n##উ\n##এ\n##ও\n##ক\n##খ\n##গ\n##চ\n##ছ\n##জ\n##ট\n##ড\n##ণ\n##ত\n##থ\n##দ\n##ধ\n##ন\n##প\n##ব\n##ভ\n##ম\n##য\n##র\n##ল\n##শ\n##ষ\n##স\n##হ\n##া\n##ি\n##ী\n##ে\n##க\n##ச\n##ட\n##த\n##ந\n##ன\n##ப\n##ம\n##ய\n##ர\n##ல\n##ள\n##வ\n##ா\n##ி\n##ு\n##ே\n##ை\n##ನ\n##ರ\n##ಾ\n##ක\n##ය\n##ර\n##ල\n##ව\n##ා\n##ก\n##ง\n##ต\n##ท\n##น\n##พ\n##ม\n##ย\n##ร\n##ล\n##ว\n##ส\n##อ\n##า\n##เ\n##་\n##།\n##ག\n##ང\n##ད\n##ན\n##པ\n##བ\n##མ\n##འ\n##ར\n##ལ\n##ས\n##မ\n##ა\n##ბ\n##გ\n##დ\n##ე\n##ვ\n##თ\n##ი\n##კ\n##ლ\n##მ\n##ნ\n##ო\n##რ\n##ს\n##ტ\n##უ\n##ᄀ\n##ᄂ\n##ᄃ\n##ᄅ\n##ᄆ\n##ᄇ\n##ᄉ\n##ᄊ\n##ᄋ\n##ᄌ\n##ᄎ\n##ᄏ\n##ᄐ\n##ᄑ\n##ᄒ\n##ᅡ\n##ᅢ\n##ᅥ\n##ᅦ\n##ᅧ\n##ᅩ\n##ᅪ\n##ᅭ\n##ᅮ\n##ᅯ\n##ᅲ\n##ᅳ\n##ᅴ\n##ᅵ\n##ᆨ\n##ᆫ\n##ᆯ\n##ᆷ\n##ᆸ\n##ᆼ\n##ᴬ\n##ᴮ\n##ᴰ\n##ᴵ\n##ᴺ\n##ᵀ\n##ᵃ\n##ᵇ\n##ᵈ\n##ᵉ\n##ᵍ\n##ᵏ\n##ᵐ\n##ᵒ\n##ᵖ\n##ᵗ\n##ᵘ\n##ᵣ\n##ᵤ\n##ᵥ\n##ᶜ\n##ᶠ\n##‐\n##‑\n##‒\n##–\n##—\n##―\n##‖\n##‘\n##’\n##‚\n##“\n##”\n##„\n##†\n##‡\n##•\n##…\n##‰\n##′\n##″\n##›\n##‿\n##⁄\n##⁰\n##ⁱ\n##⁴\n##⁵\n##⁶\n##⁷\n##⁸\n##⁹\n##⁻\n##ⁿ\n##₅\n##₆\n##₇\n##₈\n##₉\n##₊\n##₍\n##₎\n##ₐ\n##ₑ\n##ₒ\n##ₓ\n##ₕ\n##ₖ\n##ₗ\n##ₘ\n##ₚ\n##ₛ\n##ₜ\n##₤\n##₩\n##€\n##₱\n##₹\n##ℓ\n##№\n##ℝ\n##™\n##⅓\n##⅔\n##←\n##↑\n##→\n##↓\n##↔\n##↦\n##⇄\n##⇌\n##⇒\n##∂\n##∅\n##∆\n##∇\n##∈\n##∗\n##∘\n##√\n##∞\n##∧\n##∨\n##∩\n##∪\n##≈\n##≡\n##≤\n##≥\n##⊂\n##⊆\n##⊕\n##⊗\n##⋅\n##─\n##│\n##■\n##▪\n##●\n##★\n##☆\n##☉\n##♠\n##♣\n##♥\n##♦\n##♯\n##⟨\n##⟩\n##ⱼ\n##⺩\n##⺼\n##⽥\n##、\n##。\n##〈\n##〉\n##《\n##》\n##「\n##」\n##『\n##』\n##〜\n##あ\n##い\n##う\n##え\n##お\n##か\n##き\n##く\n##け\n##こ\n##さ\n##し\n##す\n##せ\n##そ\n##た\n##ち\n##っ\n##つ\n##て\n##と\n##な\n##に\n##ぬ\n##ね\n##の\n##は\n##ひ\n##ふ\n##へ\n##ほ\n##ま\n##み\n##む\n##め\n##も\n##や\n##ゆ\n##よ\n##ら\n##り\n##る\n##れ\n##ろ\n##を\n##ん\n##ァ\n##ア\n##ィ\n##イ\n##ウ\n##ェ\n##エ\n##オ\n##カ\n##キ\n##ク\n##ケ\n##コ\n##サ\n##シ\n##ス\n##セ\n##タ\n##チ\n##ッ\n##ツ\n##テ\n##ト\n##ナ\n##ニ\n##ノ\n##ハ\n##ヒ\n##フ\n##ヘ\n##ホ\n##マ\n##ミ\n##ム\n##メ\n##モ\n##ャ\n##ュ\n##ョ\n##ラ\n##リ\n##ル\n##レ\n##ロ\n##ワ\n##ン\n##・\n##ー\n##一\n##三\n##上\n##下\n##不\n##世\n##中\n##主\n##久\n##之\n##也\n##事\n##二\n##五\n##井\n##京\n##人\n##亻\n##仁\n##介\n##代\n##仮\n##伊\n##会\n##佐\n##侍\n##保\n##信\n##健\n##元\n##光\n##八\n##公\n##内\n##出\n##分\n##前\n##劉\n##力\n##加\n##勝\n##北\n##区\n##十\n##千\n##南\n##博\n##原\n##口\n##古\n##史\n##司\n##合\n##吉\n##同\n##名\n##和\n##囗\n##四\n##国\n##國\n##土\n##地\n##坂\n##城\n##堂\n##場\n##士\n##夏\n##外\n##大\n##天\n##太\n##夫\n##奈\n##女\n##子\n##学\n##宀\n##宇\n##安\n##宗\n##定\n##宣\n##宮\n##家\n##宿\n##寺\n##將\n##小\n##尚\n##山\n##岡\n##島\n##崎\n##川\n##州\n##巿\n##帝\n##平\n##年\n##幸\n##广\n##弘\n##張\n##彳\n##後\n##御\n##德\n##心\n##忄\n##志\n##忠\n##愛\n##成\n##我\n##戦\n##戸\n##手\n##扌\n##政\n##文\n##新\n##方\n##日\n##明\n##星\n##春\n##昭\n##智\n##曲\n##書\n##月\n##有\n##朝\n##木\n##本\n##李\n##村\n##東\n##松\n##林\n##森\n##楊\n##樹\n##橋\n##歌\n##止\n##正\n##武\n##比\n##氏\n##民\n##水\n##氵\n##氷\n##永\n##江\n##沢\n##河\n##治\n##法\n##海\n##清\n##漢\n##瀬\n##火\n##版\n##犬\n##王\n##生\n##田\n##男\n##疒\n##発\n##白\n##的\n##皇\n##目\n##相\n##省\n##真\n##石\n##示\n##社\n##神\n##福\n##禾\n##秀\n##秋\n##空\n##立\n##章\n##竹\n##糹\n##美\n##義\n##耳\n##良\n##艹\n##花\n##英\n##華\n##葉\n##藤\n##行\n##街\n##西\n##見\n##訁\n##語\n##谷\n##貝\n##貴\n##車\n##軍\n##辶\n##道\n##郎\n##郡\n##部\n##都\n##里\n##野\n##金\n##鈴\n##镇\n##長\n##門\n##間\n##阝\n##阿\n##陳\n##陽\n##雄\n##青\n##面\n##風\n##食\n##香\n##馬\n##高\n##龍\n##龸\n##ﬁ\n##ﬂ\n##！\n##（\n##）\n##，\n##－\n##．\n##／\n##：\n##？\n##～\n"
  }
]