[
  {
    "path": "README.md",
    "content": "# exploring-T5\nA repo to explore different NLP tasks which can be solved using T5\n"
  },
  {
    "path": "T5_on_TPU.ipynb",
    "content": "{\n  \"nbformat\": 4,\n  \"nbformat_minor\": 0,\n  \"metadata\": {\n    \"colab\": {\n      \"name\": \"T5 on TPU\",\n      \"provenance\": [],\n      \"collapsed_sections\": [],\n      \"machine_shape\": \"hm\",\n      \"authorship_tag\": \"ABX9TyNhznFJcPoY54Wwqs6aEdRJ\",\n      \"include_colab_link\": true\n    },\n    \"kernelspec\": {\n      \"name\": \"python3\",\n      \"display_name\": \"Python 3\"\n    },\n    \"accelerator\": \"TPU\",\n    \"widgets\": {\n      \"application/vnd.jupyter.widget-state+json\": {\n        \"44ca82b3bbc5432eadc4f6fa3b81f483\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_9a58f575a463454aba3a52742abf00bf\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_6217a8ed0c634a5bbcbb4070d61d2ce3\",\n              \"IPY_MODEL_9542805d664d451aa33a5bd7cc36d614\"\n            ]\n          }\n        },\n        \"9a58f575a463454aba3a52742abf00bf\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"6217a8ed0c634a5bbcbb4070d61d2ce3\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_f90e54ee9dee42a99f38fdf2186cea62\",\n            \"_dom_classes\": [],\n            \"description\": \"Downloading: 100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"success\",\n            \"max\": 791656,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 791656,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_2085a86f64a04a1586fefa0fe1413406\"\n          }\n        },\n        \"9542805d664d451aa33a5bd7cc36d614\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_8e0c79e36d8c4d1faab99cc565feeb17\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 792k/792k [00:06&lt;00:00, 124kB/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_413ae83fdf9d4573b2c2dc053db83aa2\"\n          }\n        },\n        \"f90e54ee9dee42a99f38fdf2186cea62\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"2085a86f64a04a1586fefa0fe1413406\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"8e0c79e36d8c4d1faab99cc565feeb17\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"413ae83fdf9d4573b2c2dc053db83aa2\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"d7fa301d6f7d498bac466849f04321c5\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_0f662bc0fa4444f386ec8975b4289f04\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_a2b08cf2bb834808b02d20fbee2f00ab\",\n              \"IPY_MODEL_5efd16badcb64aa3a708975504feade1\"\n            ]\n          }\n        },\n        \"0f662bc0fa4444f386ec8975b4289f04\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"a2b08cf2bb834808b02d20fbee2f00ab\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_a763d584af9147bcbe5430f95bf16925\",\n            \"_dom_classes\": [],\n            \"description\": \"Downloading: 100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"success\",\n            \"max\": 4997,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 4997,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_329f27a50bee452b8f5ee719929fb33b\"\n          }\n        },\n        \"5efd16badcb64aa3a708975504feade1\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_0be03081b1324f79a99bdbe926c30e89\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 5.00k/5.00k [00:00&lt;00:00, 11.7kB/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_dca7ff44a73a4c9f872cb2596c3c729e\"\n          }\n        },\n        \"a763d584af9147bcbe5430f95bf16925\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"329f27a50bee452b8f5ee719929fb33b\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"0be03081b1324f79a99bdbe926c30e89\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"dca7ff44a73a4c9f872cb2596c3c729e\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"d530b763a0e9415f9b8b1f1787a66619\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_7b85b787b5d9490c994f9e406835813b\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_f86c72c0da864856b9ddb55568fe4a2b\",\n              \"IPY_MODEL_a4e0f5b0b3cf4091a44bad74e5abb62e\"\n            ]\n          }\n        },\n        \"7b85b787b5d9490c994f9e406835813b\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"f86c72c0da864856b9ddb55568fe4a2b\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_3ecc3e6a31db4835987c22b81a85c23f\",\n            \"_dom_classes\": [],\n            \"description\": \"Downloading: 100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"success\",\n            \"max\": 2240,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 2240,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_7c062b51acab4bb59eeb4269d5e6c2df\"\n          }\n        },\n        \"a4e0f5b0b3cf4091a44bad74e5abb62e\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_7fbf906e11d24bfdb90e3f0ce46250c1\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 2.24k/2.24k [00:02&lt;00:00, 1.02kB/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_43deb763aee24506a82dd795cd126f49\"\n          }\n        },\n        \"3ecc3e6a31db4835987c22b81a85c23f\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"7c062b51acab4bb59eeb4269d5e6c2df\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"7fbf906e11d24bfdb90e3f0ce46250c1\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"43deb763aee24506a82dd795cd126f49\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"6af39625f78e4e5989739eff7e8849f6\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_d97ac4c36e7944fdb989cf7ee1851350\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_2539f0244f0e468a993609fd562ff1f5\",\n              \"IPY_MODEL_17025b38dedb45a39a95ffe7b6117d87\"\n            ]\n          }\n        },\n        \"d97ac4c36e7944fdb989cf7ee1851350\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"2539f0244f0e468a993609fd562ff1f5\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_b6dde636eb51433fb5be7af2cedc2774\",\n            \"_dom_classes\": [],\n            \"description\": \"Downloading: \",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"success\",\n            \"max\": 8116577,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 8116577,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_ff877f980dd34d2d9f3a4d3efb50af0c\"\n          }\n        },\n        \"17025b38dedb45a39a95ffe7b6117d87\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_cde27b62215448be92d8c8fa2292438a\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 30.3M/? [00:00&lt;00:00, 41.3MB/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_5b66084b700145d5903aa91bcd2920f7\"\n          }\n        },\n        \"b6dde636eb51433fb5be7af2cedc2774\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"ff877f980dd34d2d9f3a4d3efb50af0c\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"cde27b62215448be92d8c8fa2292438a\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"5b66084b700145d5903aa91bcd2920f7\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"741a693169b54851a4cb47369dd9bd1e\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_eb0f02133b9140be92288fd6a8415deb\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_03a75d6d64524d47a3df6457e1901cbd\",\n              \"IPY_MODEL_b4a2213a0a0b421d895269733344cc9d\"\n            ]\n          }\n        },\n        \"eb0f02133b9140be92288fd6a8415deb\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"03a75d6d64524d47a3df6457e1901cbd\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_64e947e3d9e440cf9f3880cb2325e57a\",\n            \"_dom_classes\": [],\n            \"description\": \"Downloading: \",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"success\",\n            \"max\": 1054280,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 1054280,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_108bbc9991614357aae4469fc19b4859\"\n          }\n        },\n        \"b4a2213a0a0b421d895269733344cc9d\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_6792931cb55944deb260f07d6bf87931\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 4.85M/? [00:00&lt;00:00, 15.2MB/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_3cb18f41264740c9933f159a9820ad77\"\n          }\n        },\n        \"64e947e3d9e440cf9f3880cb2325e57a\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"108bbc9991614357aae4469fc19b4859\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"6792931cb55944deb260f07d6bf87931\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"3cb18f41264740c9933f159a9820ad77\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"004faf88da144ad2bd3e50b5b9766627\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_f02e5f34ac414081ab2882b2ab7ff64d\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_41c3aac64e424e1db59b723a11bb0fb0\",\n              \"IPY_MODEL_44540a280fb141979bdeff7f93d0cde2\"\n            ]\n          }\n        },\n        \"f02e5f34ac414081ab2882b2ab7ff64d\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"41c3aac64e424e1db59b723a11bb0fb0\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_f9c604fa9a8642eaa218824b8ab651fe\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"info\",\n            \"max\": 1,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 1,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_34e5dacca6fc4497abda0a2ad37996e0\"\n          }\n        },\n        \"44540a280fb141979bdeff7f93d0cde2\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_8bd88e5bf56e45bbbc1d339b00eff57c\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 87599/0 [00:03&lt;00:00, 5389.75 examples/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_93b33f1c33de44b4a1e57fb4133343c9\"\n          }\n        },\n        \"f9c604fa9a8642eaa218824b8ab651fe\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"34e5dacca6fc4497abda0a2ad37996e0\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"8bd88e5bf56e45bbbc1d339b00eff57c\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"93b33f1c33de44b4a1e57fb4133343c9\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"3ec93bdea33b40ba82e24b4a0e605bc0\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_121c3099d6e04079a4f6bf5707e0c60d\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_4f17790e2f594811bbfe55c9d883cf41\",\n              \"IPY_MODEL_602e1a740e8f41fb800dbb3af0ea04f9\"\n            ]\n          }\n        },\n        \"121c3099d6e04079a4f6bf5707e0c60d\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"4f17790e2f594811bbfe55c9d883cf41\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_c1628c0657a44cf0873f2d64c9fb549c\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"info\",\n            \"max\": 1,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 1,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_99f6bf35ae7f4e66b4d339fe93768c7c\"\n          }\n        },\n        \"602e1a740e8f41fb800dbb3af0ea04f9\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_d73cf402af074a5481bf900e6abe2c63\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 10570/0 [00:00&lt;00:00, 18381.93 examples/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_0b2c415901d8468fb3f9acb63dd2f4e3\"\n          }\n        },\n        \"c1628c0657a44cf0873f2d64c9fb549c\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"99f6bf35ae7f4e66b4d339fe93768c7c\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"d73cf402af074a5481bf900e6abe2c63\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"0b2c415901d8468fb3f9acb63dd2f4e3\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"1d197b0946534a91b2eb4595baade174\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_55c18e9169ab44c981de4cb2ebaea578\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_21065bd4f9fa4d97a20a25c9a35b7737\",\n              \"IPY_MODEL_88d28cee2f02428aa3c4c22a873d6385\"\n            ]\n          }\n        },\n        \"55c18e9169ab44c981de4cb2ebaea578\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"21065bd4f9fa4d97a20a25c9a35b7737\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_12d2d5b9a3f54b25aa8fef6439625db6\",\n            \"_dom_classes\": [],\n            \"description\": \"Epoch: 100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"success\",\n            \"max\": 4,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 4,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_32f70451efa6473a8a00807dd22a3248\"\n          }\n        },\n        \"88d28cee2f02428aa3c4c22a873d6385\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_17c358e37fc346a9ac8ce475599e495a\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 4/4 [20:24&lt;00:00, 306.06s/it]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_947afd4c522443b38bc223114178c7ee\"\n          }\n        },\n        \"12d2d5b9a3f54b25aa8fef6439625db6\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"32f70451efa6473a8a00807dd22a3248\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"17c358e37fc346a9ac8ce475599e495a\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"947afd4c522443b38bc223114178c7ee\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"a558cfceca8b43568dfad2267a534a35\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_7a5270ddb66447af9fba298e830fe8a4\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_d961f9fa40a04ceda3e1962eef4d33cc\",\n              \"IPY_MODEL_d9363dcd670643e6ad2d0b65c4238b98\"\n            ]\n          }\n        },\n        \"7a5270ddb66447af9fba298e830fe8a4\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"d961f9fa40a04ceda3e1962eef4d33cc\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_b25b3d9394be4ac786678181c800e926\",\n            \"_dom_classes\": [],\n            \"description\": \"Iteration: 100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"success\",\n            \"max\": 1369,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 1369,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_3fed14bdec8b4162887d77dea7a9f289\"\n          }\n        },\n        \"d9363dcd670643e6ad2d0b65c4238b98\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_5051978cf84f4f8191c1e3ad4661776e\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 1369/1369 [20:23&lt;00:00,  1.12it/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_f67b49a0d48f4714a124bc972e9afa0f\"\n          }\n        },\n        \"b25b3d9394be4ac786678181c800e926\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"3fed14bdec8b4162887d77dea7a9f289\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"5051978cf84f4f8191c1e3ad4661776e\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"f67b49a0d48f4714a124bc972e9afa0f\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"2c4d7bdf7f6c48f0825ef87a12399f39\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_391fc0937c3b48f8b4c3b33a1c3849a0\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_11c6ff7dfb19436c920c5114734c0ec9\",\n              \"IPY_MODEL_3b3722a55e3d43bc93c29a505895a25c\"\n            ]\n          }\n        },\n        \"391fc0937c3b48f8b4c3b33a1c3849a0\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"11c6ff7dfb19436c920c5114734c0ec9\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_f46743ac2da84d6ca77180d8686ead6b\",\n            \"_dom_classes\": [],\n            \"description\": \"Iteration:   0%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"danger\",\n            \"max\": 1369,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 0,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_1b0e2b414d6843ae9c918db417ffff4d\"\n          }\n        },\n        \"3b3722a55e3d43bc93c29a505895a25c\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_e7559a6907f7479db7764f80c4501993\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 0/1369 [00:00&lt;?, ?it/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_5f89a141b34d478e861ea6bd4a17dcd2\"\n          }\n        },\n        \"f46743ac2da84d6ca77180d8686ead6b\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"1b0e2b414d6843ae9c918db417ffff4d\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"e7559a6907f7479db7764f80c4501993\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"5f89a141b34d478e861ea6bd4a17dcd2\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"a9a7aeae6fa74b67a349a69e845f8898\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_0a9d6c739f984b91a0e17724707eb7e1\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_1ed37d3716374294b072524937625965\",\n              \"IPY_MODEL_4b44de48aeeb4b8dbc9388fc7f7da227\"\n            ]\n          }\n        },\n        \"0a9d6c739f984b91a0e17724707eb7e1\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"1ed37d3716374294b072524937625965\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_051052ef94d447a4b0233225d8f5ae81\",\n            \"_dom_classes\": [],\n            \"description\": \"Iteration:   0%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"danger\",\n            \"max\": 1369,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 0,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_3e43d0668e6f461c9cc461aa735a4437\"\n          }\n        },\n        \"4b44de48aeeb4b8dbc9388fc7f7da227\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_43e265f386ca4dfd88e226a87f32775c\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 0/1369 [00:00&lt;?, ?it/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_6568a6e42f734c29ad5c26f2397a5a69\"\n          }\n        },\n        \"051052ef94d447a4b0233225d8f5ae81\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"3e43d0668e6f461c9cc461aa735a4437\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"43e265f386ca4dfd88e226a87f32775c\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"6568a6e42f734c29ad5c26f2397a5a69\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"c227333162114432a38b1480ec837701\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_71a3e5b846a74f409dfd029ea3bf5d61\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_8a27b38cd17649b1bc720a33050dc520\",\n              \"IPY_MODEL_16497f2b4de443bb9bb14ccaf0c94803\"\n            ]\n          }\n        },\n        \"71a3e5b846a74f409dfd029ea3bf5d61\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"8a27b38cd17649b1bc720a33050dc520\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_f93601a0c6d84d8b9324fa2380e14d25\",\n            \"_dom_classes\": [],\n            \"description\": \"Iteration:   0%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"danger\",\n            \"max\": 1369,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 0,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_f30f39adee694c55bcbaa850cf031b69\"\n          }\n        },\n        \"16497f2b4de443bb9bb14ccaf0c94803\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_ea4e4964058945f287eb2a6271b5bae7\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 0/1369 [00:00&lt;?, ?it/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_1e74937c1f304d15ad49230b2428590d\"\n          }\n        },\n        \"f93601a0c6d84d8b9324fa2380e14d25\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"f30f39adee694c55bcbaa850cf031b69\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"ea4e4964058945f287eb2a6271b5bae7\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"1e74937c1f304d15ad49230b2428590d\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"df609e7f302e48949a49fae1b0deeb54\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_be907685a2b949d0a088a907a2cb2a90\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_743bba68cbe943d78ce63a1efd70d929\",\n              \"IPY_MODEL_f3fffc31462b49e193b3c24d600b4ddb\"\n            ]\n          }\n        },\n        \"be907685a2b949d0a088a907a2cb2a90\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"743bba68cbe943d78ce63a1efd70d929\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_87fa58101b24400da90e4b0f87c7cb1a\",\n            \"_dom_classes\": [],\n            \"description\": \"100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"success\",\n            \"max\": 331,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 331,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_3c39d97054ba47a3ad3d9bb9a882756e\"\n          }\n        },\n        \"f3fffc31462b49e193b3c24d600b4ddb\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_20855b3cb56944cfb0d6aa13bd06429a\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 331/331 [1:31:09&lt;00:00, 16.52s/it]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_779d80cc14104171ab40ff53170e4807\"\n          }\n        },\n        \"87fa58101b24400da90e4b0f87c7cb1a\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"3c39d97054ba47a3ad3d9bb9a882756e\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"20855b3cb56944cfb0d6aa13bd06429a\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"779d80cc14104171ab40ff53170e4807\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        }\n      }\n    }\n  },\n  \"cells\": [\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"view-in-github\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"<a href=\\\"https://colab.research.google.com/github/patil-suraj/exploring-T5/blob/master/T5_on_TPU.ipynb\\\" target=\\\"_parent\\\"><img src=\\\"https://colab.research.google.com/assets/colab-badge.svg\\\" alt=\\\"Open In Colab\\\"/></a>\"\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"_6q2San5Xh5N\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"# T5 on TPU 💥🚀\"\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"knpacarPX2AN\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"In this notebook we will see how to train T5 model on TPU with Huggingface's awesome new [trainer](https://github.com/huggingface/transformers/blob/master/src/transformers/trainer.py). We will train T5 base model on SQUAD dataset for QA task. We will use the recently released amazing [nlp](https://github.com/huggingface/nlp) package to load and process the dataset in just few lines.\\n\",\n        \"\\n\",\n        \"First make sure you are connected to the high RAM instance. This will not work on 12 GB colab instance.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"QLGiFCDqvuil\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"# Crash on purpose to get more ram :\\n\",\n        \"import torch\\n\",\n        \"torch.tensor([10.]*10000000000)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"40CJSrN9ZiIP\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"Let's install [PyTorch/XLA](https://github.com/pytorch/xla) which enables PyTorch on TPU. Make sure you install the nightly version, as the trainer breaks on other versions.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"coOmS2s_xDBy\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"6ed0947e-4061-4ba7-9eca-72e7ed935bd6\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 907\n        }\n      },\n      \"source\": [\n        \"VERSION = \\\"nightly\\\"  #@param [\\\"1.5\\\" , \\\"20200325\\\", \\\"nightly\\\"]\\n\",\n        \"!curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py\\n\",\n        \"!python pytorch-xla-env-setup.py --version $VERSION\"\n      ],\n      \"execution_count\": 5,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\\n\",\n            \"                                 Dload  Upload   Total   Spent    Left  Speed\\n\",\n            \"\\r  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0\\r100  4264  100  4264    0     0  60914      0 --:--:-- --:--:-- --:--:-- 60914\\n\",\n            \"Updating TPU and VM. This may take around 2 minutes.\\n\",\n            \"Updating TPU runtime to pytorch-nightly ...\\n\",\n            \"Uninstalling torch-1.5.0+cu101:\\n\",\n            \"Done updating TPU runtime: <Response [200]>\\n\",\n            \"  Successfully uninstalled torch-1.5.0+cu101\\n\",\n            \"Uninstalling torchvision-0.6.0+cu101:\\n\",\n            \"  Successfully uninstalled torchvision-0.6.0+cu101\\n\",\n            \"Copying gs://tpu-pytorch/wheels/torch-nightly-cp36-cp36m-linux_x86_64.whl...\\n\",\n            \"- [1 files][ 91.1 MiB/ 91.1 MiB]                                                \\n\",\n            \"Operation completed over 1 objects/91.1 MiB.                                     \\n\",\n            \"Copying gs://tpu-pytorch/wheels/torch_xla-nightly-cp36-cp36m-linux_x86_64.whl...\\n\",\n            \"- [1 files][119.8 MiB/119.8 MiB]                                                \\n\",\n            \"Operation completed over 1 objects/119.8 MiB.                                    \\n\",\n            \"Copying gs://tpu-pytorch/wheels/torchvision-nightly-cp36-cp36m-linux_x86_64.whl...\\n\",\n            \"/ [1 files][  2.3 MiB/  2.3 MiB]                                                \\n\",\n            \"Operation completed over 1 objects/2.3 MiB.                                      \\n\",\n            \"Processing ./torch-nightly-cp36-cp36m-linux_x86_64.whl\\n\",\n            \"Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from torch==nightly) (1.18.4)\\n\",\n            \"Requirement already satisfied: future in /usr/local/lib/python3.6/dist-packages (from torch==nightly) (0.16.0)\\n\",\n            \"\\u001b[31mERROR: fastai 1.0.61 requires torchvision, which is not installed.\\u001b[0m\\n\",\n            \"Installing collected packages: torch\\n\",\n            \"Successfully installed torch-1.6.0a0+83df3be\\n\",\n            \"Processing ./torch_xla-nightly-cp36-cp36m-linux_x86_64.whl\\n\",\n            \"Installing collected packages: torch-xla\\n\",\n            \"Successfully installed torch-xla-1.6+2191422\\n\",\n            \"Processing ./torchvision-nightly-cp36-cp36m-linux_x86_64.whl\\n\",\n            \"Requirement already satisfied: pillow>=4.1.1 in /usr/local/lib/python3.6/dist-packages (from torchvision==nightly) (7.0.0)\\n\",\n            \"Requirement already satisfied: torch in /usr/local/lib/python3.6/dist-packages (from torchvision==nightly) (1.6.0a0+83df3be)\\n\",\n            \"Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from torchvision==nightly) (1.18.4)\\n\",\n            \"Requirement already satisfied: future in /usr/local/lib/python3.6/dist-packages (from torch->torchvision==nightly) (0.16.0)\\n\",\n            \"Installing collected packages: torchvision\\n\",\n            \"Successfully installed torchvision-0.7.0a0+348dd5a\\n\",\n            \"Reading package lists... Done\\n\",\n            \"Building dependency tree       \\n\",\n            \"Reading state information... Done\\n\",\n            \"The following NEW packages will be installed:\\n\",\n            \"  libomp5\\n\",\n            \"0 upgraded, 1 newly installed, 0 to remove and 31 not upgraded.\\n\",\n            \"Need to get 234 kB of archives.\\n\",\n            \"After this operation, 774 kB of additional disk space will be used.\\n\",\n            \"Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libomp5 amd64 5.0.1-1 [234 kB]\\n\",\n            \"Fetched 234 kB in 1s (371 kB/s)\\n\",\n            \"Selecting previously unselected package libomp5:amd64.\\n\",\n            \"(Reading database ... 144433 files and directories currently installed.)\\n\",\n            \"Preparing to unpack .../libomp5_5.0.1-1_amd64.deb ...\\n\",\n            \"Unpacking libomp5:amd64 (5.0.1-1) ...\\n\",\n            \"Setting up libomp5:amd64 (5.0.1-1) ...\\n\",\n            \"Processing triggers for libc-bin (2.27-3ubuntu1) ...\\n\",\n            \"/sbin/ldconfig.real: /usr/local/lib/python3.6/dist-packages/ideep4py/lib/libmkldnn.so.0 is not a symbolic link\\n\",\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"6-p80vyFZ-S8\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"Install transformers and the nlp package. Restart colab after this\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"ptPupnLsfkMH\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"94ed3f2e-a762-4969-cc6f-34382fc779ec\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 1000\n        }\n      },\n      \"source\": [\n        \"!git clone https://github.com/huggingface/transformers.git\\n\",\n        \"!pip install ./transformers\\n\",\n        \"!pip install -U nlp\"\n      ],\n      \"execution_count\": 6,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"Cloning into 'transformers'...\\n\",\n            \"remote: Enumerating objects: 94, done.\\u001b[K\\n\",\n            \"remote: Counting objects:   1% (1/94)\\u001b[K\\rremote: Counting objects:   2% (2/94)\\u001b[K\\rremote: Counting objects:   3% (3/94)\\u001b[K\\rremote: Counting objects:   4% (4/94)\\u001b[K\\rremote: Counting objects:   5% (5/94)\\u001b[K\\rremote: Counting objects:   6% (6/94)\\u001b[K\\rremote: Counting objects:   7% (7/94)\\u001b[K\\rremote: Counting objects:   8% (8/94)\\u001b[K\\rremote: Counting objects:   9% (9/94)\\u001b[K\\rremote: Counting objects:  10% (10/94)\\u001b[K\\rremote: Counting objects:  11% (11/94)\\u001b[K\\rremote: Counting objects:  12% (12/94)\\u001b[K\\rremote: Counting objects:  13% (13/94)\\u001b[K\\rremote: Counting objects:  14% (14/94)\\u001b[K\\rremote: Counting objects:  15% (15/94)\\u001b[K\\rremote: Counting objects:  17% (16/94)\\u001b[K\\rremote: Counting objects:  18% (17/94)\\u001b[K\\rremote: Counting objects:  19% (18/94)\\u001b[K\\rremote: Counting objects:  20% (19/94)\\u001b[K\\rremote: Counting objects:  21% (20/94)\\u001b[K\\rremote: Counting objects:  22% (21/94)\\u001b[K\\rremote: Counting objects:  23% (22/94)\\u001b[K\\rremote: Counting objects:  24% (23/94)\\u001b[K\\rremote: Counting objects:  25% (24/94)\\u001b[K\\rremote: Counting objects:  26% (25/94)\\u001b[K\\rremote: Counting objects:  27% (26/94)\\u001b[K\\rremote: Counting objects:  28% (27/94)\\u001b[K\\rremote: Counting objects:  29% (28/94)\\u001b[K\\rremote: Counting objects:  30% (29/94)\\u001b[K\\rremote: Counting objects:  31% (30/94)\\u001b[K\\rremote: Counting objects:  32% (31/94)\\u001b[K\\rremote: Counting objects:  34% (32/94)\\u001b[K\\rremote: Counting objects:  35% (33/94)\\u001b[K\\rremote: Counting objects:  36% (34/94)\\u001b[K\\rremote: Counting objects:  37% (35/94)\\u001b[K\\rremote: Counting objects:  38% (36/94)\\u001b[K\\rremote: Counting objects:  39% (37/94)\\u001b[K\\rremote: Counting objects:  40% (38/94)\\u001b[K\\rremote: Counting objects:  41% (39/94)\\u001b[K\\rremote: Counting objects:  42% (40/94)\\u001b[K\\rremote: Counting objects:  43% (41/94)\\u001b[K\\rremote: Counting objects:  44% (42/94)\\u001b[K\\rremote: Counting objects:  45% (43/94)\\u001b[K\\rremote: Counting objects:  46% (44/94)\\u001b[K\\rremote: Counting objects:  47% (45/94)\\u001b[K\\rremote: Counting objects:  48% (46/94)\\u001b[K\\rremote: Counting objects:  50% (47/94)\\u001b[K\\rremote: Counting objects:  51% (48/94)\\u001b[K\\rremote: Counting objects:  52% (49/94)\\u001b[K\\rremote: Counting objects:  53% (50/94)\\u001b[K\\rremote: Counting objects:  54% (51/94)\\u001b[K\\rremote: Counting objects:  55% (52/94)\\u001b[K\\rremote: Counting objects:  56% (53/94)\\u001b[K\\rremote: Counting objects:  57% (54/94)\\u001b[K\\rremote: Counting objects:  58% (55/94)\\u001b[K\\rremote: Counting objects:  59% (56/94)\\u001b[K\\rremote: Counting objects:  60% (57/94)\\u001b[K\\rremote: Counting objects:  61% (58/94)\\u001b[K\\rremote: Counting objects:  62% (59/94)\\u001b[K\\rremote: Counting objects:  63% (60/94)\\u001b[K\\rremote: Counting objects:  64% (61/94)\\u001b[K\\rremote: Counting objects:  65% (62/94)\\u001b[K\\rremote: Counting objects:  67% (63/94)\\u001b[K\\rremote: Counting objects:  68% (64/94)\\u001b[K\\rremote: Counting objects:  69% (65/94)\\u001b[K\\rremote: Counting objects:  70% (66/94)\\u001b[K\\rremote: Counting objects:  71% (67/94)\\u001b[K\\rremote: Counting objects:  72% (68/94)\\u001b[K\\rremote: Counting objects:  73% (69/94)\\u001b[K\\rremote: Counting objects:  74% (70/94)\\u001b[K\\rremote: Counting objects:  75% (71/94)\\u001b[K\\rremote: Counting objects:  76% (72/94)\\u001b[K\\rremote: Counting objects:  77% (73/94)\\u001b[K\\rremote: Counting objects:  78% (74/94)\\u001b[K\\rremote: Counting objects:  79% (75/94)\\u001b[K\\rremote: Counting objects:  80% (76/94)\\u001b[K\\rremote: Counting objects:  81% (77/94)\\u001b[K\\rremote: Counting objects:  82% (78/94)\\u001b[K\\rremote: Counting objects:  84% (79/94)\\u001b[K\\rremote: Counting objects:  85% (80/94)\\u001b[K\\rremote: Counting objects:  86% (81/94)\\u001b[K\\rremote: Counting objects:  87% (82/94)\\u001b[K\\rremote: Counting objects:  88% (83/94)\\u001b[K\\rremote: Counting objects:  89% (84/94)\\u001b[K\\rremote: Counting objects:  90% (85/94)\\u001b[K\\rremote: Counting objects:  91% (86/94)\\u001b[K\\rremote: Counting objects:  92% (87/94)\\u001b[K\\rremote: Counting objects:  93% (88/94)\\u001b[K\\rremote: Counting objects:  94% (89/94)\\u001b[K\\rremote: Counting objects:  95% (90/94)\\u001b[K\\rremote: Counting objects:  96% (91/94)\\u001b[K\\rremote: Counting objects:  97% (92/94)\\u001b[K\\rremote: Counting objects:  98% (93/94)\\u001b[K\\rremote: Counting objects: 100% (94/94)\\u001b[K\\rremote: Counting objects: 100% (94/94), done.\\u001b[K\\n\",\n            \"remote: Compressing objects: 100% (46/46), done.\\u001b[K\\n\",\n            \"remote: Total 26447 (delta 52), reused 66 (delta 32), pack-reused 26353\\u001b[K\\n\",\n            \"Receiving objects: 100% (26447/26447), 15.95 MiB | 29.37 MiB/s, done.\\n\",\n            \"Resolving deltas: 100% (18423/18423), done.\\n\",\n            \"Processing ./transformers\\n\",\n            \"Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from transformers==2.9.1) (1.18.4)\\n\",\n            \"Collecting tokenizers==0.7.0\\n\",\n            \"\\u001b[?25l  Downloading https://files.pythonhosted.org/packages/14/e5/a26eb4716523808bb0a799fcfdceb6ebf77a18169d9591b2f46a9adb87d9/tokenizers-0.7.0-cp36-cp36m-manylinux1_x86_64.whl (3.8MB)\\n\",\n            \"\\u001b[K     |████████████████████████████████| 3.8MB 3.5MB/s \\n\",\n            \"\\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.6/dist-packages (from transformers==2.9.1) (3.0.12)\\n\",\n            \"Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from transformers==2.9.1) (2.23.0)\\n\",\n            \"Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.6/dist-packages (from transformers==2.9.1) (4.41.1)\\n\",\n            \"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.6/dist-packages (from transformers==2.9.1) (2019.12.20)\\n\",\n            \"Collecting sentencepiece\\n\",\n            \"\\u001b[?25l  Downloading https://files.pythonhosted.org/packages/3b/88/49e772d686088e1278766ad68a463513642a2a877487decbd691dec02955/sentencepiece-0.1.90-cp36-cp36m-manylinux1_x86_64.whl (1.1MB)\\n\",\n            \"\\u001b[K     |████████████████████████████████| 1.1MB 32.5MB/s \\n\",\n            \"\\u001b[?25hCollecting sacremoses\\n\",\n            \"\\u001b[?25l  Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)\\n\",\n            \"\\u001b[K     |████████████████████████████████| 890kB 46.5MB/s \\n\",\n            \"\\u001b[?25hRequirement already satisfied: dataclasses in /usr/local/lib/python3.6/dist-packages (from transformers==2.9.1) (0.7)\\n\",\n            \"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->transformers==2.9.1) (2020.4.5.1)\\n\",\n            \"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->transformers==2.9.1) (1.24.3)\\n\",\n            \"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->transformers==2.9.1) (3.0.4)\\n\",\n            \"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->transformers==2.9.1) (2.9)\\n\",\n            \"Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers==2.9.1) (1.12.0)\\n\",\n            \"Requirement already satisfied: click in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers==2.9.1) (7.1.2)\\n\",\n            \"Requirement already satisfied: joblib in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers==2.9.1) (0.14.1)\\n\",\n            \"Building wheels for collected packages: transformers, sacremoses\\n\",\n            \"  Building wheel for transformers (setup.py) ... \\u001b[?25l\\u001b[?25hdone\\n\",\n            \"  Created wheel for transformers: filename=transformers-2.9.1-cp36-none-any.whl size=637722 sha256=d72ddcc31cb33178d6e0190c642ac0d5fe63801c03375fcf7c85b379c12a2938\\n\",\n            \"  Stored in directory: /tmp/pip-ephem-wheel-cache-417x3jto/wheels/23/19/dd/2561a4e47240cf6b307729d58e56f8077dd0c698f5992216cf\\n\",\n            \"  Building wheel for sacremoses (setup.py) ... \\u001b[?25l\\u001b[?25hdone\\n\",\n            \"  Created wheel for sacremoses: filename=sacremoses-0.0.43-cp36-none-any.whl size=893260 sha256=7d348f2205ae5ec3039f13604ba844a8aa8083f6b9a96382cb0b3ee487157340\\n\",\n            \"  Stored in directory: /root/.cache/pip/wheels/29/3c/fd/7ce5c3f0666dab31a50123635e6fb5e19ceb42ce38d4e58f45\\n\",\n            \"Successfully built transformers sacremoses\\n\",\n            \"Installing collected packages: tokenizers, sentencepiece, sacremoses, transformers\\n\",\n            \"Successfully installed sacremoses-0.0.43 sentencepiece-0.1.90 tokenizers-0.7.0 transformers-2.9.1\\n\",\n            \"Collecting nlp\\n\",\n            \"\\u001b[?25l  Downloading https://files.pythonhosted.org/packages/21/17/0a408ac3403c71c978e7906146c69ed00b18712a4548e34d0ffb567c34cc/nlp-0.1.0-py3-none-any.whl (87kB)\\n\",\n            \"\\u001b[K     |████████████████████████████████| 92kB 2.9MB/s \\n\",\n            \"\\u001b[?25hRequirement already satisfied, skipping upgrade: numpy in /usr/local/lib/python3.6/dist-packages (from nlp) (1.18.4)\\n\",\n            \"Requirement already satisfied, skipping upgrade: tqdm>=4.27 in /usr/local/lib/python3.6/dist-packages (from nlp) (4.41.1)\\n\",\n            \"Collecting pyarrow>=0.16.0\\n\",\n            \"\\u001b[?25l  Downloading https://files.pythonhosted.org/packages/8c/d8/ff07b4cf88362ae4bebe5b7ffc357e2e6c773cdf2db8fec1bf9e973012dd/pyarrow-0.17.0-cp36-cp36m-manylinux2014_x86_64.whl (63.8MB)\\n\",\n            \"\\u001b[K     |████████████████████████████████| 63.8MB 69kB/s \\n\",\n            \"\\u001b[?25hRequirement already satisfied, skipping upgrade: dataclasses; python_version < \\\"3.7\\\" in /usr/local/lib/python3.6/dist-packages (from nlp) (0.7)\\n\",\n            \"Requirement already satisfied, skipping upgrade: filelock in /usr/local/lib/python3.6/dist-packages (from nlp) (3.0.12)\\n\",\n            \"Requirement already satisfied, skipping upgrade: requests>=2.19.0 in /usr/local/lib/python3.6/dist-packages (from nlp) (2.23.0)\\n\",\n            \"Requirement already satisfied, skipping upgrade: dill in /usr/local/lib/python3.6/dist-packages (from nlp) (0.3.1.1)\\n\",\n            \"Requirement already satisfied, skipping upgrade: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests>=2.19.0->nlp) (1.24.3)\\n\",\n            \"Requirement already satisfied, skipping upgrade: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests>=2.19.0->nlp) (2.9)\\n\",\n            \"Requirement already satisfied, skipping upgrade: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests>=2.19.0->nlp) (3.0.4)\\n\",\n            \"Requirement already satisfied, skipping upgrade: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests>=2.19.0->nlp) (2020.4.5.1)\\n\",\n            \"Installing collected packages: pyarrow, nlp\\n\",\n            \"  Found existing installation: pyarrow 0.14.1\\n\",\n            \"    Uninstalling pyarrow-0.14.1:\\n\",\n            \"      Successfully uninstalled pyarrow-0.14.1\\n\",\n            \"Successfully installed nlp-0.1.0 pyarrow-0.17.0\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.colab-display-data+json\": {\n              \"pip_warning\": {\n                \"packages\": [\n                  \"pyarrow\"\n                ]\n              }\n            }\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"--2020-05-16 16:02:40--  https://raw.githubusercontent.com/huggingface/transformers/2d184cb553ee20943b03b253f44300e466357871/examples/xla_spawn.py\\n\",\n            \"Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\\n\",\n            \"Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\\n\",\n            \"HTTP request sent, awaiting response... 200 OK\\n\",\n            \"Length: 1913 (1.9K) [text/plain]\\n\",\n            \"Saving to: ‘xla_spawn.py’\\n\",\n            \"\\n\",\n            \"\\rxla_spawn.py          0%[                    ]       0  --.-KB/s               \\rxla_spawn.py        100%[===================>]   1.87K  --.-KB/s    in 0s      \\n\",\n            \"\\n\",\n            \"2020-05-16 16:02:41 (40.3 MB/s) - ‘xla_spawn.py’ saved [1913/1913]\\n\",\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"zFWlfEJllAcw\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"## Load and process data\"\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"NVOz2QUtaKQb\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"Let's load and process the dataset using the nlp library. We will process the examples in follwoing way to cast QA task in text-to-text setting\\n\",\n        \"\\n\",\n        \"**input**\\n\",\n        \"question: question_text  context: context \\n\",\n        \"\\n\",\n        \"**target**\\n\",\n        \"answer_text\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"CaRw0ke1e1sF\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"import torch\\n\",\n        \"import nlp\\n\",\n        \"from transformers import T5Tokenizer\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"NaGYDvKUe8VS\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"a8cba6aa-c83d-4efb-d5ee-9bc6831509af\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 66,\n          \"referenced_widgets\": [\n            \"44ca82b3bbc5432eadc4f6fa3b81f483\",\n            \"9a58f575a463454aba3a52742abf00bf\",\n            \"6217a8ed0c634a5bbcbb4070d61d2ce3\",\n            \"9542805d664d451aa33a5bd7cc36d614\",\n            \"f90e54ee9dee42a99f38fdf2186cea62\",\n            \"2085a86f64a04a1586fefa0fe1413406\",\n            \"8e0c79e36d8c4d1faab99cc565feeb17\",\n            \"413ae83fdf9d4573b2c2dc053db83aa2\"\n          ]\n        }\n      },\n      \"source\": [\n        \"tokenizer = T5Tokenizer.from_pretrained('t5-base')\"\n      ],\n      \"execution_count\": 2,\n      \"outputs\": [\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"44ca82b3bbc5432eadc4f6fa3b81f483\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=791656.0, style=ProgressStyle(descripti…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"-gJOEe0Ye0di\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"# process the examples in input and target text format and the eos token at the end \\n\",\n        \"def add_eos_to_examples(example):\\n\",\n        \"    example['input_text'] = 'question: %s  context: %s </s>' % (example['question'], example['context'])\\n\",\n        \"    example['target_text'] = '%s </s>' % example['answers']['text'][0]\\n\",\n        \"    return example\\n\",\n        \"\\n\",\n        \"# tokenize the examples\\n\",\n        \"def convert_to_features(example_batch):\\n\",\n        \"    input_encodings = tokenizer.batch_encode_plus(example_batch['input_text'], pad_to_max_length=True, max_length=512)\\n\",\n        \"    target_encodings = tokenizer.batch_encode_plus(example_batch['target_text'], pad_to_max_length=True, max_length=16)\\n\",\n        \"\\n\",\n        \"    encodings = {\\n\",\n        \"        'input_ids': input_encodings['input_ids'], \\n\",\n        \"        'attention_mask': input_encodings['attention_mask'],\\n\",\n        \"        'target_ids': target_encodings['input_ids'],\\n\",\n        \"        'target_attention_mask': target_encodings['attention_mask']\\n\",\n        \"    }\\n\",\n        \"\\n\",\n        \"    return encodings\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"2ZWE4addfSmi\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"aee10e16-998e-45cd-feaa-e05d1f42283b\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 333,\n          \"referenced_widgets\": [\n            \"d7fa301d6f7d498bac466849f04321c5\",\n            \"0f662bc0fa4444f386ec8975b4289f04\",\n            \"a2b08cf2bb834808b02d20fbee2f00ab\",\n            \"5efd16badcb64aa3a708975504feade1\",\n            \"a763d584af9147bcbe5430f95bf16925\",\n            \"329f27a50bee452b8f5ee719929fb33b\",\n            \"0be03081b1324f79a99bdbe926c30e89\",\n            \"dca7ff44a73a4c9f872cb2596c3c729e\",\n            \"d530b763a0e9415f9b8b1f1787a66619\",\n            \"7b85b787b5d9490c994f9e406835813b\",\n            \"f86c72c0da864856b9ddb55568fe4a2b\",\n            \"a4e0f5b0b3cf4091a44bad74e5abb62e\",\n            \"3ecc3e6a31db4835987c22b81a85c23f\",\n            \"7c062b51acab4bb59eeb4269d5e6c2df\",\n            \"7fbf906e11d24bfdb90e3f0ce46250c1\",\n            \"43deb763aee24506a82dd795cd126f49\",\n            \"6af39625f78e4e5989739eff7e8849f6\",\n            \"d97ac4c36e7944fdb989cf7ee1851350\",\n            \"2539f0244f0e468a993609fd562ff1f5\",\n            \"17025b38dedb45a39a95ffe7b6117d87\",\n            \"b6dde636eb51433fb5be7af2cedc2774\",\n            \"ff877f980dd34d2d9f3a4d3efb50af0c\",\n            \"cde27b62215448be92d8c8fa2292438a\",\n            \"5b66084b700145d5903aa91bcd2920f7\",\n            \"741a693169b54851a4cb47369dd9bd1e\",\n            \"eb0f02133b9140be92288fd6a8415deb\",\n            \"03a75d6d64524d47a3df6457e1901cbd\",\n            \"b4a2213a0a0b421d895269733344cc9d\",\n            \"64e947e3d9e440cf9f3880cb2325e57a\",\n            \"108bbc9991614357aae4469fc19b4859\",\n            \"6792931cb55944deb260f07d6bf87931\",\n            \"3cb18f41264740c9933f159a9820ad77\",\n            \"004faf88da144ad2bd3e50b5b9766627\",\n            \"f02e5f34ac414081ab2882b2ab7ff64d\",\n            \"41c3aac64e424e1db59b723a11bb0fb0\",\n            \"44540a280fb141979bdeff7f93d0cde2\",\n            \"f9c604fa9a8642eaa218824b8ab651fe\",\n            \"34e5dacca6fc4497abda0a2ad37996e0\",\n            \"8bd88e5bf56e45bbbc1d339b00eff57c\",\n            \"93b33f1c33de44b4a1e57fb4133343c9\",\n            \"3ec93bdea33b40ba82e24b4a0e605bc0\",\n            \"121c3099d6e04079a4f6bf5707e0c60d\",\n            \"4f17790e2f594811bbfe55c9d883cf41\",\n            \"602e1a740e8f41fb800dbb3af0ea04f9\",\n            \"c1628c0657a44cf0873f2d64c9fb549c\",\n            \"99f6bf35ae7f4e66b4d339fe93768c7c\",\n            \"d73cf402af074a5481bf900e6abe2c63\",\n            \"0b2c415901d8468fb3f9acb63dd2f4e3\"\n          ]\n        }\n      },\n      \"source\": [\n        \"# load train and validation split of squad\\n\",\n        \"train_dataset  = nlp.load_dataset('squad', split=nlp.Split.TRAIN)\\n\",\n        \"valid_dataset = nlp.load_dataset('squad', split=nlp.Split.VALIDATION)\\n\",\n        \"\\n\",\n        \"# map add_eos_to_examples function to the dataset example wise \\n\",\n        \"train_dataset = train_dataset.map(add_eos_to_examples)\\n\",\n        \"# map convert_to_features batch wise\\n\",\n        \"train_dataset = train_dataset.map(convert_to_features, batched=True)\\n\",\n        \"\\n\",\n        \"valid_dataset = valid_dataset.map(add_eos_to_examples, load_from_cache_file=False)\\n\",\n        \"valid_dataset = valid_dataset.map(convert_to_features, batched=True, load_from_cache_file=False)\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"# set the tensor type and the columns which the dataset should return\\n\",\n        \"columns = ['input_ids', 'target_ids', 'attention_mask', 'target_attention_mask']\\n\",\n        \"train_dataset.set_format(type='torch', columns=columns)\\n\",\n        \"valid_dataset.set_format(type='torch', columns=columns)\"\n      ],\n      \"execution_count\": 4,\n      \"outputs\": [\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"d7fa301d6f7d498bac466849f04321c5\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=4997.0, style=ProgressStyle(description…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"d530b763a0e9415f9b8b1f1787a66619\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=2240.0, style=ProgressStyle(description…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\",\n            \"Downloading and preparing dataset squad/plain_text (download: 33.51 MiB, generated: 85.75 MiB, total: 119.27 MiB) to /root/.cache/huggingface/datasets/squad/plain_text/1.0.0...\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"6af39625f78e4e5989739eff7e8849f6\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=8116577.0, style=ProgressStyle(descript…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"741a693169b54851a4cb47369dd9bd1e\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1054280.0, style=ProgressStyle(descript…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"004faf88da144ad2bd3e50b5b9766627\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\r\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"3ec93bdea33b40ba82e24b4a0e605bc0\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\rDataset squad downloaded and prepared to /root/.cache/huggingface/datasets/squad/plain_text/1.0.0. Subsequent calls will reuse this data.\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"87599it [00:04, 19002.06it/s]\\n\",\n            \"100%|██████████| 88/88 [00:50<00:00,  1.75it/s]\\n\",\n            \"10570it [00:00, 18815.95it/s]\\n\",\n            \"100%|██████████| 11/11 [00:06<00:00,  1.77it/s]\\n\"\n          ],\n          \"name\": \"stderr\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"vXJ24xVmlMoN\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"a12ce45b-09bc-462f-dae4-f2278c9442c5\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 34\n        }\n      },\n      \"source\": [\n        \"len(train_dataset), len(valid_dataset)\"\n      ],\n      \"execution_count\": 5,\n      \"outputs\": [\n        {\n          \"output_type\": \"execute_result\",\n          \"data\": {\n            \"text/plain\": [\n              \"(87599, 10570)\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          },\n          \"execution_count\": 5\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"UYOvGLdVgoxt\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"# cach the dataset, so we can load it directly for training\\n\",\n        \"\\n\",\n        \"torch.save(train_dataset, 'train_data.pt')\\n\",\n        \"torch.save(valid_dataset, 'valid_data.pt')\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"NASBkGrtnbgj\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"For more details on how to use the nlp library check out this [notebook](https://colab.research.google.com/github/huggingface/nlp/blob/master/notebooks/Overview.ipynb).\"\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"tty8vuMBqI5L\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"## Write training script\"\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"djKKcgN1cvAX\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"Using the `Trainer` is pretty straightforward. Here are the 4 basic steps which are needed to use trainer.\\n\",\n        \"\\n\",\n        \"1. **Parse the arguments needed**. These are divided in 3 parts for clarity and seperation (TrainingArguments, ModelArguments and DataTrainingArguments).\\n\",\n        \"\\n\",\n        \"  1. **TrainingArguments**: These are basicaly the training hyperparameters such as learning rate, batch size, weight decay, gradient accumulation steps etc. See all possible arguments [here](https://github.com/huggingface/transformers/blob/master/src/transformers/training_args.py). These are used by the Trainer.\\n\",\n        \"\\n\",\n        \"  2. **ModelArguments**: These are the arguments for the model that you want to use such as the model_name_or_path, tokenizer_name etc. You'll need these to load the model and tokenizer.\\n\",\n        \"\\n\",\n        \"  3. **DataTrainingArguments**: These are as the name suggests arguments needed for the dataset. Such as the directory name where your files are stored etc. You'll need these to load/process the dataset.\\n\",\n        \"\\n\",\n        \"  TrainingArguments are already defined in the `TrainingArguments` class, you'll need to define `ModelArguments` and `DataTrainingArguments` classes for your task.\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"2. Load train and eval datasets\\n\",\n        \"3. Initialize the `Trainer`\\n\",\n        \"\\n\",\n        \"    These are the mininum parameters which you'll for initializing `Trainer`. For full list check [here](https://github.com/huggingface/transformers/blob/master/src/transformers/trainer.py#L107)\\n\",\n        \"\\n\",\n        \"    ```\\n\",\n        \"      model: PreTrainedModel\\n\",\n        \"      args: TrainingArguments\\n\",\n        \"      train_dataset: Optional[Dataset]\\n\",\n        \"      eval_dataset: Optional[Dataset]\\n\",\n        \"    ```\\n\",\n        \"4. Start training with  `trainer.train`\\n\",\n        \"\\n\",\n        \"    Call `trainer.train` and let the magic begin!\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"There are lots of things which the trainer handles for you out of the box such as gradient_accumulation, fp16 training, setting up the optimizer and scheduler, logging with wandb etc. I didn't set-up wandb for this experiment, but will explore it for sure in future experiment.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"KdmKlMkfcLa0\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"import dataclasses\\n\",\n        \"import logging\\n\",\n        \"import os\\n\",\n        \"import sys\\n\",\n        \"from dataclasses import dataclass, field\\n\",\n        \"from typing import Dict, List, Optional\\n\",\n        \"\\n\",\n        \"import numpy as np\\n\",\n        \"import torch\\n\",\n        \"\\n\",\n        \"from transformers import T5ForConditionalGeneration, T5Tokenizer, EvalPrediction\\n\",\n        \"from transformers import (\\n\",\n        \"    HfArgumentParser,\\n\",\n        \"    DataCollator,\\n\",\n        \"    Trainer,\\n\",\n        \"    TrainingArguments,\\n\",\n        \"    set_seed,\\n\",\n        \")\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"logger = logging.getLogger(__name__)\\n\",\n        \"\\n\",\n        \"# prepares lm_labels from target_ids, returns examples with keys as expected by the forward method\\n\",\n        \"# this is necessacry because the trainer directly passes this dict as arguments to the model\\n\",\n        \"# so make sure the keys match the parameter names of the forward method\\n\",\n        \"@dataclass\\n\",\n        \"class T2TDataCollator(DataCollator):\\n\",\n        \"    def collate_batch(self, batch: List) -> Dict[str, torch.Tensor]:\\n\",\n        \"        \\\"\\\"\\\"\\n\",\n        \"        Take a list of samples from a Dataset and collate them into a batch.\\n\",\n        \"        Returns:\\n\",\n        \"            A dictionary of tensors\\n\",\n        \"        \\\"\\\"\\\"\\n\",\n        \"        input_ids = torch.stack([example['input_ids'] for example in batch])\\n\",\n        \"        lm_labels = torch.stack([example['target_ids'] for example in batch])\\n\",\n        \"        lm_labels[lm_labels[:, :] == 0] = -100\\n\",\n        \"        attention_mask = torch.stack([example['attention_mask'] for example in batch])\\n\",\n        \"        decoder_attention_mask = torch.stack([example['target_attention_mask'] for example in batch])\\n\",\n        \"        \\n\",\n        \"\\n\",\n        \"        return {\\n\",\n        \"            'input_ids': input_ids, \\n\",\n        \"            'attention_mask': attention_mask,\\n\",\n        \"            'lm_labels': lm_labels, \\n\",\n        \"            'decoder_attention_mask': decoder_attention_mask\\n\",\n        \"        }\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"@dataclass\\n\",\n        \"class ModelArguments:\\n\",\n        \"    \\\"\\\"\\\"\\n\",\n        \"    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.\\n\",\n        \"    \\\"\\\"\\\"\\n\",\n        \"\\n\",\n        \"    model_name_or_path: str = field(\\n\",\n        \"        metadata={\\\"help\\\": \\\"Path to pretrained model or model identifier from huggingface.co/models\\\"}\\n\",\n        \"    )\\n\",\n        \"    tokenizer_name: Optional[str] = field(\\n\",\n        \"        default=None, metadata={\\\"help\\\": \\\"Pretrained tokenizer name or path if not the same as model_name\\\"}\\n\",\n        \"    )\\n\",\n        \"    cache_dir: Optional[str] = field(\\n\",\n        \"        default=None, metadata={\\\"help\\\": \\\"Where do you want to store the pretrained models downloaded from s3\\\"}\\n\",\n        \"    )\\n\",\n        \"\\n\",\n        \"@dataclass\\n\",\n        \"class DataTrainingArguments:\\n\",\n        \"    \\\"\\\"\\\"\\n\",\n        \"    Arguments pertaining to what data we are going to input our model for training and eval.\\n\",\n        \"    \\\"\\\"\\\"\\n\",\n        \"    train_file_path: Optional[str] = field(\\n\",\n        \"        default='train_data.pt',\\n\",\n        \"        metadata={\\\"help\\\": \\\"Path for cached train dataset\\\"},\\n\",\n        \"    )\\n\",\n        \"    valid_file_path: Optional[str] = field(\\n\",\n        \"        default='valid_data.pt',\\n\",\n        \"        metadata={\\\"help\\\": \\\"Path for cached valid dataset\\\"},\\n\",\n        \"    )\\n\",\n        \"    max_len: Optional[int] = field(\\n\",\n        \"        default=512,\\n\",\n        \"        metadata={\\\"help\\\": \\\"Max input length for the source text\\\"},\\n\",\n        \"    )\\n\",\n        \"    target_max_len: Optional[int] = field(\\n\",\n        \"        default=32,\\n\",\n        \"        metadata={\\\"help\\\": \\\"Max input length for the target text\\\"},\\n\",\n        \"    )\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"def main():\\n\",\n        \"    # See all possible arguments in src/transformers/training_args.py\\n\",\n        \"    # or by passing the --help flag to this script.\\n\",\n        \"    # We now keep distinct sets of args, for a cleaner separation of concerns.\\n\",\n        \"\\n\",\n        \"    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))\\n\",\n        \"\\n\",\n        \"    # we will load the arguments from a json file, \\n\",\n        \"    #make sure you save the arguments in at ./args.json\\n\",\n        \"    model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath('args.json'))\\n\",\n        \"\\n\",\n        \"    if (\\n\",\n        \"        os.path.exists(training_args.output_dir)\\n\",\n        \"        and os.listdir(training_args.output_dir)\\n\",\n        \"        and training_args.do_train\\n\",\n        \"        and not training_args.overwrite_output_dir\\n\",\n        \"    ):\\n\",\n        \"        raise ValueError(\\n\",\n        \"            f\\\"Output directory ({training_args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome.\\\"\\n\",\n        \"        )\\n\",\n        \"\\n\",\n        \"    # Setup logging\\n\",\n        \"    logging.basicConfig(\\n\",\n        \"        format=\\\"%(asctime)s - %(levelname)s - %(name)s -   %(message)s\\\",\\n\",\n        \"        datefmt=\\\"%m/%d/%Y %H:%M:%S\\\",\\n\",\n        \"        level=logging.INFO if training_args.local_rank in [-1, 0] else logging.WARN,\\n\",\n        \"    )\\n\",\n        \"    logger.warning(\\n\",\n        \"        \\\"Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s\\\",\\n\",\n        \"        training_args.local_rank,\\n\",\n        \"        training_args.device,\\n\",\n        \"        training_args.n_gpu,\\n\",\n        \"        bool(training_args.local_rank != -1),\\n\",\n        \"        training_args.fp16,\\n\",\n        \"    )\\n\",\n        \"    logger.info(\\\"Training/evaluation parameters %s\\\", training_args)\\n\",\n        \"\\n\",\n        \"    # Set seed\\n\",\n        \"    set_seed(training_args.seed)\\n\",\n        \"\\n\",\n        \"    # Load pretrained model and tokenizer\\n\",\n        \"    #\\n\",\n        \"    # Distributed training:\\n\",\n        \"    # The .from_pretrained methods guarantee that only one local process can concurrently\\n\",\n        \"    # download model & vocab.\\n\",\n        \"\\n\",\n        \"    tokenizer = T5Tokenizer.from_pretrained(\\n\",\n        \"        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,\\n\",\n        \"        cache_dir=model_args.cache_dir,\\n\",\n        \"    )\\n\",\n        \"    model = T5ForConditionalGeneration.from_pretrained(\\n\",\n        \"        model_args.model_name_or_path,\\n\",\n        \"        cache_dir=model_args.cache_dir,\\n\",\n        \"    )\\n\",\n        \"\\n\",\n        \"    # Get datasets\\n\",\n        \"    print('loading data')\\n\",\n        \"    train_dataset  = torch.load(data_args.train_file_path)\\n\",\n        \"    valid_dataset = torch.load(data_args.valid_file_path)\\n\",\n        \"    print('loading done')\\n\",\n        \"\\n\",\n        \"    # Initialize our Trainer\\n\",\n        \"    trainer = Trainer(\\n\",\n        \"        model=model,\\n\",\n        \"        args=training_args,\\n\",\n        \"        train_dataset=train_dataset,\\n\",\n        \"        eval_dataset=valid_dataset,\\n\",\n        \"        data_collator=T2TDataCollator(),\\n\",\n        \"        prediction_loss_only=True\\n\",\n        \"    )\\n\",\n        \"\\n\",\n        \"    # Training\\n\",\n        \"    if training_args.do_train:\\n\",\n        \"        trainer.train(\\n\",\n        \"            model_path=model_args.model_name_or_path if os.path.isdir(model_args.model_name_or_path) else None\\n\",\n        \"        )\\n\",\n        \"        trainer.save_model()\\n\",\n        \"        # For convenience, we also re-save the tokenizer to the same directory,\\n\",\n        \"        # so that you can share your model easily on huggingface.co/models =)\\n\",\n        \"        if trainer.is_world_master():\\n\",\n        \"            tokenizer.save_pretrained(training_args.output_dir)\\n\",\n        \"\\n\",\n        \"    # Evaluation\\n\",\n        \"    results = {}\\n\",\n        \"    if training_args.do_eval and training_args.local_rank in [-1, 0]:\\n\",\n        \"        logger.info(\\\"*** Evaluate ***\\\")\\n\",\n        \"\\n\",\n        \"        eval_output = trainer.evaluate()\\n\",\n        \"\\n\",\n        \"        output_eval_file = os.path.join(training_args.output_dir, \\\"eval_results.txt\\\")\\n\",\n        \"        with open(output_eval_file, \\\"w\\\") as writer:\\n\",\n        \"            logger.info(\\\"***** Eval results *****\\\")\\n\",\n        \"            for key in sorted(eval_output.keys()):\\n\",\n        \"                logger.info(\\\"  %s = %s\\\", key, str(eval_output[key]))\\n\",\n        \"                writer.write(\\\"%s = %s\\\\n\\\" % (key, str(eval_output[key])))\\n\",\n        \"    \\n\",\n        \"        results.update(eval_output)\\n\",\n        \"    \\n\",\n        \"    return results\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"def _mp_fn(index):\\n\",\n        \"    # For xla_spawn (TPUs)\\n\",\n        \"    main()\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"15duw24hqMBy\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"## Train\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"n1I6IhBM1KV2\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"import json\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"zOvs9RUllLTw\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"Let's write the arguments in a dict and store in a json file. The above code will load this file and parse the arguments.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"2ObtXlBVuJqv\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"args_dict = {\\n\",\n        \"  \\\"num_cores\\\": 8,\\n\",\n        \"  'training_script': 'train_t5_squad.py',\\n\",\n        \"  \\\"model_name_or_path\\\": 't5-base',\\n\",\n        \"  \\\"max_len\\\": 512 ,\\n\",\n        \"  \\\"target_max_len\\\": 16,\\n\",\n        \"  \\\"output_dir\\\": './models/tpu',\\n\",\n        \"  \\\"overwrite_output_dir\\\": True,\\n\",\n        \"  \\\"per_gpu_train_batch_size\\\": 8,\\n\",\n        \"  \\\"per_gpu_eval_batch_size\\\": 8,\\n\",\n        \"  \\\"gradient_accumulation_steps\\\": 4,\\n\",\n        \"  \\\"learning_rate\\\": 1e-4,\\n\",\n        \"  \\\"tpu_num_cores\\\": 8,\\n\",\n        \"  \\\"num_train_epochs\\\": 4,\\n\",\n        \"  \\\"do_train\\\": True\\n\",\n        \"}\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"xU5MI8ju1L3w\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"with open('args.json', 'w') as f:\\n\",\n        \"  json.dump(args_dict, f)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"AsQB1Kpjlltp\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"Start training!\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"UnGuDVPYuyo4\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"import torch_xla.distributed.xla_multiprocessing as xmp\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"X9_Go99fvW-z\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"784c2b84-ecb2-4a2a-871b-dc63f63ccc74\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 1000,\n          \"referenced_widgets\": [\n            \"1d197b0946534a91b2eb4595baade174\",\n            \"55c18e9169ab44c981de4cb2ebaea578\",\n            \"21065bd4f9fa4d97a20a25c9a35b7737\",\n            \"88d28cee2f02428aa3c4c22a873d6385\",\n            \"12d2d5b9a3f54b25aa8fef6439625db6\",\n            \"32f70451efa6473a8a00807dd22a3248\",\n            \"17c358e37fc346a9ac8ce475599e495a\",\n            \"947afd4c522443b38bc223114178c7ee\",\n            \"a558cfceca8b43568dfad2267a534a35\",\n            \"7a5270ddb66447af9fba298e830fe8a4\",\n            \"d961f9fa40a04ceda3e1962eef4d33cc\",\n            \"d9363dcd670643e6ad2d0b65c4238b98\",\n            \"b25b3d9394be4ac786678181c800e926\",\n            \"3fed14bdec8b4162887d77dea7a9f289\",\n            \"5051978cf84f4f8191c1e3ad4661776e\",\n            \"f67b49a0d48f4714a124bc972e9afa0f\",\n            \"2c4d7bdf7f6c48f0825ef87a12399f39\",\n            \"391fc0937c3b48f8b4c3b33a1c3849a0\",\n            \"11c6ff7dfb19436c920c5114734c0ec9\",\n            \"3b3722a55e3d43bc93c29a505895a25c\",\n            \"f46743ac2da84d6ca77180d8686ead6b\",\n            \"1b0e2b414d6843ae9c918db417ffff4d\",\n            \"e7559a6907f7479db7764f80c4501993\",\n            \"5f89a141b34d478e861ea6bd4a17dcd2\",\n            \"a9a7aeae6fa74b67a349a69e845f8898\",\n            \"0a9d6c739f984b91a0e17724707eb7e1\",\n            \"1ed37d3716374294b072524937625965\",\n            \"4b44de48aeeb4b8dbc9388fc7f7da227\",\n            \"051052ef94d447a4b0233225d8f5ae81\",\n            \"3e43d0668e6f461c9cc461aa735a4437\",\n            \"43e265f386ca4dfd88e226a87f32775c\",\n            \"6568a6e42f734c29ad5c26f2397a5a69\",\n            \"c227333162114432a38b1480ec837701\",\n            \"71a3e5b846a74f409dfd029ea3bf5d61\",\n            \"8a27b38cd17649b1bc720a33050dc520\",\n            \"16497f2b4de443bb9bb14ccaf0c94803\",\n            \"f93601a0c6d84d8b9324fa2380e14d25\",\n            \"f30f39adee694c55bcbaa850cf031b69\",\n            \"ea4e4964058945f287eb2a6271b5bae7\",\n            \"1e74937c1f304d15ad49230b2428590d\"\n          ]\n        }\n      },\n      \"source\": [\n        \"xmp.spawn(_mp_fn, args=(), nprocs=8, start_method='fork')\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"05/16/2020 09:42:27 - INFO - transformers.training_args -   PyTorch: setting up devices\\n\",\n            \"05/16/2020 09:42:27 - WARNING - __main__ -   Process rank: -1, device: xla:1, n_gpu: 0, distributed training: False, 16-bits training: False\\n\",\n            \"05/16/2020 09:42:27 - INFO - __main__ -   Training/evaluation parameters TrainingArguments(output_dir='./models/tpu', overwrite_output_dir=True, do_train=True, do_eval=False, do_predict=False, evaluate_during_training=False, per_gpu_train_batch_size=8, per_gpu_eval_batch_size=8, gradient_accumulation_steps=4, learning_rate=0.0001, weight_decay=0.0, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=4, max_steps=-1, warmup_steps=0, logging_dir=None, logging_first_step=False, logging_steps=500, save_steps=500, save_total_limit=None, no_cuda=False, seed=42, fp16=False, fp16_opt_level='O1', local_rank=-1, tpu_num_cores=8, tpu_metrics_debug=False)\\n\",\n            \"05/16/2020 09:42:27 - INFO - transformers.tokenization_utils -   loading file https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model from cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\\n\",\n            \"05/16/2020 09:42:27 - INFO - transformers.configuration_utils -   loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json from cache at /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b\\n\",\n            \"05/16/2020 09:42:27 - INFO - transformers.configuration_utils -   Model config T5Config {\\n\",\n            \"  \\\"architectures\\\": [\\n\",\n            \"    \\\"T5WithLMHeadModel\\\"\\n\",\n            \"  ],\\n\",\n            \"  \\\"d_ff\\\": 3072,\\n\",\n            \"  \\\"d_kv\\\": 64,\\n\",\n            \"  \\\"d_model\\\": 768,\\n\",\n            \"  \\\"decoder_start_token_id\\\": 0,\\n\",\n            \"  \\\"dropout_rate\\\": 0.1,\\n\",\n            \"  \\\"eos_token_id\\\": 1,\\n\",\n            \"  \\\"initializer_factor\\\": 1.0,\\n\",\n            \"  \\\"is_encoder_decoder\\\": true,\\n\",\n            \"  \\\"layer_norm_epsilon\\\": 1e-06,\\n\",\n            \"  \\\"model_type\\\": \\\"t5\\\",\\n\",\n            \"  \\\"n_positions\\\": 512,\\n\",\n            \"  \\\"num_heads\\\": 12,\\n\",\n            \"  \\\"num_layers\\\": 12,\\n\",\n            \"  \\\"output_past\\\": true,\\n\",\n            \"  \\\"pad_token_id\\\": 0,\\n\",\n            \"  \\\"relative_attention_num_buckets\\\": 32,\\n\",\n            \"  \\\"task_specific_params\\\": {\\n\",\n            \"    \\\"summarization\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"length_penalty\\\": 2.0,\\n\",\n            \"      \\\"max_length\\\": 200,\\n\",\n            \"      \\\"min_length\\\": 30,\\n\",\n            \"      \\\"no_repeat_ngram_size\\\": 3,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"summarize: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_de\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to German: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_fr\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to French: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_ro\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to Romanian: \\\"\\n\",\n            \"    }\\n\",\n            \"  },\\n\",\n            \"  \\\"vocab_size\\\": 32128\\n\",\n            \"}\\n\",\n            \"\\n\",\n            \"05/16/2020 09:42:27 - INFO - transformers.modeling_utils -   loading weights file https://cdn.huggingface.co/t5-base-pytorch_model.bin from cache at /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa\\n\",\n            \"05/16/2020 09:42:29 - INFO - transformers.training_args -   PyTorch: setting up devices\\n\",\n            \"05/16/2020 09:42:29 - WARNING - __main__ -   Process rank: -1, device: xla:0, n_gpu: 0, distributed training: False, 16-bits training: False\\n\",\n            \"05/16/2020 09:42:29 - INFO - __main__ -   Training/evaluation parameters TrainingArguments(output_dir='./models/tpu', overwrite_output_dir=True, do_train=True, do_eval=False, do_predict=False, evaluate_during_training=False, per_gpu_train_batch_size=8, per_gpu_eval_batch_size=8, gradient_accumulation_steps=4, learning_rate=0.0001, weight_decay=0.0, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=4, max_steps=-1, warmup_steps=0, logging_dir=None, logging_first_step=False, logging_steps=500, save_steps=500, save_total_limit=None, no_cuda=False, seed=42, fp16=False, fp16_opt_level='O1', local_rank=-1, tpu_num_cores=8, tpu_metrics_debug=False)\\n\",\n            \"05/16/2020 09:42:29 - INFO - transformers.training_args -   PyTorch: setting up devices\\n\",\n            \"05/16/2020 09:42:29 - WARNING - __main__ -   Process rank: -1, device: xla:0, n_gpu: 0, distributed training: False, 16-bits training: False\\n\",\n            \"05/16/2020 09:42:29 - INFO - __main__ -   Training/evaluation parameters TrainingArguments(output_dir='./models/tpu', overwrite_output_dir=True, do_train=True, do_eval=False, do_predict=False, evaluate_during_training=False, per_gpu_train_batch_size=8, per_gpu_eval_batch_size=8, gradient_accumulation_steps=4, learning_rate=0.0001, weight_decay=0.0, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=4, max_steps=-1, warmup_steps=0, logging_dir=None, logging_first_step=False, logging_steps=500, save_steps=500, save_total_limit=None, no_cuda=False, seed=42, fp16=False, fp16_opt_level='O1', local_rank=-1, tpu_num_cores=8, tpu_metrics_debug=False)\\n\",\n            \"05/16/2020 09:42:29 - INFO - transformers.tokenization_utils -   loading file https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model from cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\\n\",\n            \"05/16/2020 09:42:29 - INFO - transformers.tokenization_utils -   loading file https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model from cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\\n\",\n            \"05/16/2020 09:42:29 - INFO - transformers.configuration_utils -   loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json from cache at /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b\\n\",\n            \"05/16/2020 09:42:29 - INFO - transformers.configuration_utils -   Model config T5Config {\\n\",\n            \"  \\\"architectures\\\": [\\n\",\n            \"    \\\"T5WithLMHeadModel\\\"\\n\",\n            \"  ],\\n\",\n            \"  \\\"d_ff\\\": 3072,\\n\",\n            \"  \\\"d_kv\\\": 64,\\n\",\n            \"  \\\"d_model\\\": 768,\\n\",\n            \"  \\\"decoder_start_token_id\\\": 0,\\n\",\n            \"  \\\"dropout_rate\\\": 0.1,\\n\",\n            \"  \\\"eos_token_id\\\": 1,\\n\",\n            \"  \\\"initializer_factor\\\": 1.0,\\n\",\n            \"  \\\"is_encoder_decoder\\\": true,\\n\",\n            \"  \\\"layer_norm_epsilon\\\": 1e-06,\\n\",\n            \"  \\\"model_type\\\": \\\"t5\\\",\\n\",\n            \"  \\\"n_positions\\\": 512,\\n\",\n            \"  \\\"num_heads\\\": 12,\\n\",\n            \"  \\\"num_layers\\\": 12,\\n\",\n            \"  \\\"output_past\\\": true,\\n\",\n            \"  \\\"pad_token_id\\\": 0,\\n\",\n            \"  \\\"relative_attention_num_buckets\\\": 32,\\n\",\n            \"  \\\"task_specific_params\\\": {\\n\",\n            \"    \\\"summarization\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"length_penalty\\\": 2.0,\\n\",\n            \"      \\\"max_length\\\": 200,\\n\",\n            \"      \\\"min_length\\\": 30,\\n\",\n            \"      \\\"no_repeat_ngram_size\\\": 3,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"summarize: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_de\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to German: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_fr\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to French: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_ro\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to Romanian: \\\"\\n\",\n            \"    }\\n\",\n            \"  },\\n\",\n            \"  \\\"vocab_size\\\": 32128\\n\",\n            \"}\\n\",\n            \"\\n\",\n            \"05/16/2020 09:42:29 - INFO - transformers.training_args -   PyTorch: setting up devices\\n\",\n            \"05/16/2020 09:42:29 - WARNING - __main__ -   Process rank: -1, device: xla:0, n_gpu: 0, distributed training: False, 16-bits training: False\\n\",\n            \"05/16/2020 09:42:29 - INFO - __main__ -   Training/evaluation parameters TrainingArguments(output_dir='./models/tpu', overwrite_output_dir=True, do_train=True, do_eval=False, do_predict=False, evaluate_during_training=False, per_gpu_train_batch_size=8, per_gpu_eval_batch_size=8, gradient_accumulation_steps=4, learning_rate=0.0001, weight_decay=0.0, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=4, max_steps=-1, warmup_steps=0, logging_dir=None, logging_first_step=False, logging_steps=500, save_steps=500, save_total_limit=None, no_cuda=False, seed=42, fp16=False, fp16_opt_level='O1', local_rank=-1, tpu_num_cores=8, tpu_metrics_debug=False)\\n\",\n            \"05/16/2020 09:42:29 - INFO - transformers.configuration_utils -   loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json from cache at /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b\\n\",\n            \"05/16/2020 09:42:29 - INFO - transformers.configuration_utils -   Model config T5Config {\\n\",\n            \"  \\\"architectures\\\": [\\n\",\n            \"    \\\"T5WithLMHeadModel\\\"\\n\",\n            \"  ],\\n\",\n            \"  \\\"d_ff\\\": 3072,\\n\",\n            \"  \\\"d_kv\\\": 64,\\n\",\n            \"  \\\"d_model\\\": 768,\\n\",\n            \"  \\\"decoder_start_token_id\\\": 0,\\n\",\n            \"  \\\"dropout_rate\\\": 0.1,\\n\",\n            \"  \\\"eos_token_id\\\": 1,\\n\",\n            \"  \\\"initializer_factor\\\": 1.0,\\n\",\n            \"  \\\"is_encoder_decoder\\\": true,\\n\",\n            \"  \\\"layer_norm_epsilon\\\": 1e-06,\\n\",\n            \"  \\\"model_type\\\": \\\"t5\\\",\\n\",\n            \"  \\\"n_positions\\\": 512,\\n\",\n            \"  \\\"num_heads\\\": 12,\\n\",\n            \"  \\\"num_layers\\\": 12,\\n\",\n            \"  \\\"output_past\\\": true,\\n\",\n            \"  \\\"pad_token_id\\\": 0,\\n\",\n            \"  \\\"relative_attention_num_buckets\\\": 32,\\n\",\n            \"  \\\"task_specific_params\\\": {\\n\",\n            \"    \\\"summarization\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"length_penalty\\\": 2.0,\\n\",\n            \"      \\\"max_length\\\": 200,\\n\",\n            \"      \\\"min_length\\\": 30,\\n\",\n            \"      \\\"no_repeat_ngram_size\\\": 3,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"summarize: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_de\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to German: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_fr\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to French: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_ro\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to Romanian: \\\"\\n\",\n            \"    }\\n\",\n            \"  },\\n\",\n            \"  \\\"vocab_size\\\": 32128\\n\",\n            \"}\\n\",\n            \"\\n\",\n            \"05/16/2020 09:42:29 - INFO - transformers.training_args -   PyTorch: setting up devices\\n\",\n            \"05/16/2020 09:42:29 - WARNING - __main__ -   Process rank: -1, device: xla:0, n_gpu: 0, distributed training: False, 16-bits training: False\\n\",\n            \"05/16/2020 09:42:29 - INFO - transformers.tokenization_utils -   loading file https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model from cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\\n\",\n            \"05/16/2020 09:42:29 - INFO - __main__ -   Training/evaluation parameters TrainingArguments(output_dir='./models/tpu', overwrite_output_dir=True, do_train=True, do_eval=False, do_predict=False, evaluate_during_training=False, per_gpu_train_batch_size=8, per_gpu_eval_batch_size=8, gradient_accumulation_steps=4, learning_rate=0.0001, weight_decay=0.0, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=4, max_steps=-1, warmup_steps=0, logging_dir=None, logging_first_step=False, logging_steps=500, save_steps=500, save_total_limit=None, no_cuda=False, seed=42, fp16=False, fp16_opt_level='O1', local_rank=-1, tpu_num_cores=8, tpu_metrics_debug=False)\\n\",\n            \"05/16/2020 09:42:29 - INFO - transformers.modeling_utils -   loading weights file https://cdn.huggingface.co/t5-base-pytorch_model.bin from cache at /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa\\n\",\n            \"05/16/2020 09:42:29 - INFO - transformers.modeling_utils -   loading weights file https://cdn.huggingface.co/t5-base-pytorch_model.bin from cache at /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa\\n\",\n            \"05/16/2020 09:42:29 - INFO - transformers.tokenization_utils -   loading file https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model from cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\\n\",\n            \"05/16/2020 09:42:30 - INFO - transformers.configuration_utils -   loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json from cache at /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b\\n\",\n            \"05/16/2020 09:42:30 - INFO - transformers.configuration_utils -   Model config T5Config {\\n\",\n            \"  \\\"architectures\\\": [\\n\",\n            \"    \\\"T5WithLMHeadModel\\\"\\n\",\n            \"  ],\\n\",\n            \"  \\\"d_ff\\\": 3072,\\n\",\n            \"  \\\"d_kv\\\": 64,\\n\",\n            \"  \\\"d_model\\\": 768,\\n\",\n            \"  \\\"decoder_start_token_id\\\": 0,\\n\",\n            \"  \\\"dropout_rate\\\": 0.1,\\n\",\n            \"  \\\"eos_token_id\\\": 1,\\n\",\n            \"  \\\"initializer_factor\\\": 1.0,\\n\",\n            \"  \\\"is_encoder_decoder\\\": true,\\n\",\n            \"  \\\"layer_norm_epsilon\\\": 1e-06,\\n\",\n            \"  \\\"model_type\\\": \\\"t5\\\",\\n\",\n            \"  \\\"n_positions\\\": 512,\\n\",\n            \"  \\\"num_heads\\\": 12,\\n\",\n            \"  \\\"num_layers\\\": 12,\\n\",\n            \"  \\\"output_past\\\": true,\\n\",\n            \"  \\\"pad_token_id\\\": 0,\\n\",\n            \"  \\\"relative_attention_num_buckets\\\": 32,\\n\",\n            \"  \\\"task_specific_params\\\": {\\n\",\n            \"    \\\"summarization\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"length_penalty\\\": 2.0,\\n\",\n            \"      \\\"max_length\\\": 200,\\n\",\n            \"      \\\"min_length\\\": 30,\\n\",\n            \"      \\\"no_repeat_ngram_size\\\": 3,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"summarize: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_de\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to German: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_fr\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to French: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_ro\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to Romanian: \\\"\\n\",\n            \"    }\\n\",\n            \"  },\\n\",\n            \"  \\\"vocab_size\\\": 32128\\n\",\n            \"}\\n\",\n            \"\\n\",\n            \"05/16/2020 09:42:30 - INFO - transformers.training_args -   PyTorch: setting up devices\\n\",\n            \"05/16/2020 09:42:30 - WARNING - __main__ -   Process rank: -1, device: xla:0, n_gpu: 0, distributed training: False, 16-bits training: False\\n\",\n            \"05/16/2020 09:42:30 - INFO - __main__ -   Training/evaluation parameters TrainingArguments(output_dir='./models/tpu', overwrite_output_dir=True, do_train=True, do_eval=False, do_predict=False, evaluate_during_training=False, per_gpu_train_batch_size=8, per_gpu_eval_batch_size=8, gradient_accumulation_steps=4, learning_rate=0.0001, weight_decay=0.0, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=4, max_steps=-1, warmup_steps=0, logging_dir=None, logging_first_step=False, logging_steps=500, save_steps=500, save_total_limit=None, no_cuda=False, seed=42, fp16=False, fp16_opt_level='O1', local_rank=-1, tpu_num_cores=8, tpu_metrics_debug=False)\\n\",\n            \"05/16/2020 09:42:30 - INFO - transformers.training_args -   PyTorch: setting up devices\\n\",\n            \"05/16/2020 09:42:30 - WARNING - __main__ -   Process rank: -1, device: xla:0, n_gpu: 0, distributed training: False, 16-bits training: False\\n\",\n            \"05/16/2020 09:42:30 - INFO - __main__ -   Training/evaluation parameters TrainingArguments(output_dir='./models/tpu', overwrite_output_dir=True, do_train=True, do_eval=False, do_predict=False, evaluate_during_training=False, per_gpu_train_batch_size=8, per_gpu_eval_batch_size=8, gradient_accumulation_steps=4, learning_rate=0.0001, weight_decay=0.0, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=4, max_steps=-1, warmup_steps=0, logging_dir=None, logging_first_step=False, logging_steps=500, save_steps=500, save_total_limit=None, no_cuda=False, seed=42, fp16=False, fp16_opt_level='O1', local_rank=-1, tpu_num_cores=8, tpu_metrics_debug=False)\\n\",\n            \"05/16/2020 09:42:30 - INFO - transformers.configuration_utils -   loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json from cache at /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b\\n\",\n            \"05/16/2020 09:42:30 - INFO - transformers.configuration_utils -   Model config T5Config {\\n\",\n            \"  \\\"architectures\\\": [\\n\",\n            \"    \\\"T5WithLMHeadModel\\\"\\n\",\n            \"  ],\\n\",\n            \"  \\\"d_ff\\\": 3072,\\n\",\n            \"  \\\"d_kv\\\": 64,\\n\",\n            \"  \\\"d_model\\\": 768,\\n\",\n            \"  \\\"decoder_start_token_id\\\": 0,\\n\",\n            \"  \\\"dropout_rate\\\": 0.1,\\n\",\n            \"  \\\"eos_token_id\\\": 1,\\n\",\n            \"  \\\"initializer_factor\\\": 1.0,\\n\",\n            \"  \\\"is_encoder_decoder\\\": true,\\n\",\n            \"  \\\"layer_norm_epsilon\\\": 1e-06,\\n\",\n            \"  \\\"model_type\\\": \\\"t5\\\",\\n\",\n            \"  \\\"n_positions\\\": 512,\\n\",\n            \"  \\\"num_heads\\\": 12,\\n\",\n            \"  \\\"num_layers\\\": 12,\\n\",\n            \"  \\\"output_past\\\": true,\\n\",\n            \"  \\\"pad_token_id\\\": 0,\\n\",\n            \"  \\\"relative_attention_num_buckets\\\": 32,\\n\",\n            \"  \\\"task_specific_params\\\": {\\n\",\n            \"    \\\"summarization\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"length_penalty\\\": 2.0,\\n\",\n            \"      \\\"max_length\\\": 200,\\n\",\n            \"      \\\"min_length\\\": 30,\\n\",\n            \"      \\\"no_repeat_ngram_size\\\": 3,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"summarize: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_de\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to German: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_fr\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to French: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_ro\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to Romanian: \\\"\\n\",\n            \"    }\\n\",\n            \"  },\\n\",\n            \"  \\\"vocab_size\\\": 32128\\n\",\n            \"}\\n\",\n            \"\\n\",\n            \"05/16/2020 09:42:30 - INFO - transformers.modeling_utils -   loading weights file https://cdn.huggingface.co/t5-base-pytorch_model.bin from cache at /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa\\n\",\n            \"05/16/2020 09:42:30 - INFO - transformers.modeling_utils -   loading weights file https://cdn.huggingface.co/t5-base-pytorch_model.bin from cache at /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa\\n\",\n            \"05/16/2020 09:42:30 - INFO - transformers.training_args -   PyTorch: setting up devices\\n\",\n            \"05/16/2020 09:42:30 - WARNING - __main__ -   Process rank: -1, device: xla:0, n_gpu: 0, distributed training: False, 16-bits training: False\\n\",\n            \"05/16/2020 09:42:30 - INFO - __main__ -   Training/evaluation parameters TrainingArguments(output_dir='./models/tpu', overwrite_output_dir=True, do_train=True, do_eval=False, do_predict=False, evaluate_during_training=False, per_gpu_train_batch_size=8, per_gpu_eval_batch_size=8, gradient_accumulation_steps=4, learning_rate=0.0001, weight_decay=0.0, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=4, max_steps=-1, warmup_steps=0, logging_dir=None, logging_first_step=False, logging_steps=500, save_steps=500, save_total_limit=None, no_cuda=False, seed=42, fp16=False, fp16_opt_level='O1', local_rank=-1, tpu_num_cores=8, tpu_metrics_debug=False)\\n\",\n            \"05/16/2020 09:42:30 - INFO - transformers.tokenization_utils -   loading file https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model from cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\\n\",\n            \"05/16/2020 09:42:30 - INFO - transformers.tokenization_utils -   loading file https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model from cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\\n\",\n            \"05/16/2020 09:42:30 - INFO - transformers.tokenization_utils -   loading file https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model from cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\\n\",\n            \"05/16/2020 09:42:30 - INFO - transformers.configuration_utils -   loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json from cache at /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b\\n\",\n            \"05/16/2020 09:42:30 - INFO - transformers.configuration_utils -   Model config T5Config {\\n\",\n            \"  \\\"architectures\\\": [\\n\",\n            \"    \\\"T5WithLMHeadModel\\\"\\n\",\n            \"  ],\\n\",\n            \"  \\\"d_ff\\\": 3072,\\n\",\n            \"  \\\"d_kv\\\": 64,\\n\",\n            \"  \\\"d_model\\\": 768,\\n\",\n            \"  \\\"decoder_start_token_id\\\": 0,\\n\",\n            \"  \\\"dropout_rate\\\": 0.1,\\n\",\n            \"  \\\"eos_token_id\\\": 1,\\n\",\n            \"  \\\"initializer_factor\\\": 1.0,\\n\",\n            \"  \\\"is_encoder_decoder\\\": true,\\n\",\n            \"  \\\"layer_norm_epsilon\\\": 1e-06,\\n\",\n            \"  \\\"model_type\\\": \\\"t5\\\",\\n\",\n            \"  \\\"n_positions\\\": 512,\\n\",\n            \"  \\\"num_heads\\\": 12,\\n\",\n            \"  \\\"num_layers\\\": 12,\\n\",\n            \"  \\\"output_past\\\": true,\\n\",\n            \"  \\\"pad_token_id\\\": 0,\\n\",\n            \"  \\\"relative_attention_num_buckets\\\": 32,\\n\",\n            \"  \\\"task_specific_params\\\": {\\n\",\n            \"    \\\"summarization\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"length_penalty\\\": 2.0,\\n\",\n            \"      \\\"max_length\\\": 200,\\n\",\n            \"      \\\"min_length\\\": 30,\\n\",\n            \"      \\\"no_repeat_ngram_size\\\": 3,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"summarize: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_de\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to German: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_fr\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to French: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_ro\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to Romanian: \\\"\\n\",\n            \"    }\\n\",\n            \"  },\\n\",\n            \"  \\\"vocab_size\\\": 32128\\n\",\n            \"}\\n\",\n            \"\\n\",\n            \"05/16/2020 09:42:30 - INFO - transformers.configuration_utils -   loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json from cache at /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b\\n\",\n            \"05/16/2020 09:42:30 - INFO - transformers.configuration_utils -   Model config T5Config {\\n\",\n            \"  \\\"architectures\\\": [\\n\",\n            \"    \\\"T5WithLMHeadModel\\\"\\n\",\n            \"  ],\\n\",\n            \"  \\\"d_ff\\\": 3072,\\n\",\n            \"  \\\"d_kv\\\": 64,\\n\",\n            \"  \\\"d_model\\\": 768,\\n\",\n            \"  \\\"decoder_start_token_id\\\": 0,\\n\",\n            \"  \\\"dropout_rate\\\": 0.1,\\n\",\n            \"  \\\"eos_token_id\\\": 1,\\n\",\n            \"  \\\"initializer_factor\\\": 1.0,\\n\",\n            \"  \\\"is_encoder_decoder\\\": true,\\n\",\n            \"  \\\"layer_norm_epsilon\\\": 1e-06,\\n\",\n            \"  \\\"model_type\\\": \\\"t5\\\",\\n\",\n            \"  \\\"n_positions\\\": 512,\\n\",\n            \"  \\\"num_heads\\\": 12,\\n\",\n            \"  \\\"num_layers\\\": 12,\\n\",\n            \"  \\\"output_past\\\": true,\\n\",\n            \"  \\\"pad_token_id\\\": 0,\\n\",\n            \"  \\\"relative_attention_num_buckets\\\": 32,\\n\",\n            \"  \\\"task_specific_params\\\": {\\n\",\n            \"    \\\"summarization\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"length_penalty\\\": 2.0,\\n\",\n            \"      \\\"max_length\\\": 200,\\n\",\n            \"      \\\"min_length\\\": 30,\\n\",\n            \"      \\\"no_repeat_ngram_size\\\": 3,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"summarize: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_de\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to German: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_fr\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to French: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_ro\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to Romanian: \\\"\\n\",\n            \"    }\\n\",\n            \"  },\\n\",\n            \"  \\\"vocab_size\\\": 32128\\n\",\n            \"}\\n\",\n            \"05/16/2020 09:42:30 - INFO - transformers.modeling_utils -   loading weights file https://cdn.huggingface.co/t5-base-pytorch_model.bin from cache at /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa\\n\",\n            \"\\n\",\n            \"05/16/2020 09:42:30 - INFO - transformers.modeling_utils -   loading weights file https://cdn.huggingface.co/t5-base-pytorch_model.bin from cache at /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa\\n\",\n            \"05/16/2020 09:42:30 - INFO - transformers.configuration_utils -   loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json from cache at /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b\\n\",\n            \"05/16/2020 09:42:30 - INFO - transformers.configuration_utils -   Model config T5Config {\\n\",\n            \"  \\\"architectures\\\": [\\n\",\n            \"    \\\"T5WithLMHeadModel\\\"\\n\",\n            \"  ],\\n\",\n            \"  \\\"d_ff\\\": 3072,\\n\",\n            \"  \\\"d_kv\\\": 64,\\n\",\n            \"  \\\"d_model\\\": 768,\\n\",\n            \"  \\\"decoder_start_token_id\\\": 0,\\n\",\n            \"  \\\"dropout_rate\\\": 0.1,\\n\",\n            \"  \\\"eos_token_id\\\": 1,\\n\",\n            \"  \\\"initializer_factor\\\": 1.0,\\n\",\n            \"  \\\"is_encoder_decoder\\\": true,\\n\",\n            \"  \\\"layer_norm_epsilon\\\": 1e-06,\\n\",\n            \"  \\\"model_type\\\": \\\"t5\\\",\\n\",\n            \"  \\\"n_positions\\\": 512,\\n\",\n            \"  \\\"num_heads\\\": 12,\\n\",\n            \"  \\\"num_layers\\\": 12,\\n\",\n            \"  \\\"output_past\\\": true,\\n\",\n            \"  \\\"pad_token_id\\\": 0,\\n\",\n            \"  \\\"relative_attention_num_buckets\\\": 32,\\n\",\n            \"  \\\"task_specific_params\\\": {\\n\",\n            \"    \\\"summarization\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"length_penalty\\\": 2.0,\\n\",\n            \"      \\\"max_length\\\": 200,\\n\",\n            \"      \\\"min_length\\\": 30,\\n\",\n            \"      \\\"no_repeat_ngram_size\\\": 3,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"summarize: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_de\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to German: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_fr\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to French: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_ro\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to Romanian: \\\"\\n\",\n            \"    }\\n\",\n            \"  },\\n\",\n            \"  \\\"vocab_size\\\": 32128\\n\",\n            \"}\\n\",\n            \"\\n\",\n            \"05/16/2020 09:42:30 - INFO - transformers.modeling_utils -   loading weights file https://cdn.huggingface.co/t5-base-pytorch_model.bin from cache at /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa\\n\",\n            \"05/16/2020 09:42:34 - INFO - transformers.modeling_utils -   Weights of T5ForConditionalGeneration not initialized from pretrained model: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight']\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"loading data\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"05/16/2020 09:42:34 - INFO - nlp.utils.file_utils -   PyTorch version 1.6.0a0+bf2bbd9 available.\\n\",\n            \"05/16/2020 09:42:34 - INFO - nlp.utils.file_utils -   TensorFlow version 2.2.0 available.\\n\",\n            \"05/16/2020 09:42:37 - INFO - transformers.modeling_utils -   Weights of T5ForConditionalGeneration not initialized from pretrained model: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight']\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"loading data\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"05/16/2020 09:42:37 - INFO - nlp.utils.file_utils -   PyTorch version 1.6.0a0+bf2bbd9 available.\\n\",\n            \"05/16/2020 09:42:37 - INFO - nlp.utils.file_utils -   TensorFlow version 2.2.0 available.\\n\",\n            \"05/16/2020 09:42:37 - INFO - transformers.modeling_utils -   Weights of T5ForConditionalGeneration not initialized from pretrained model: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight']\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"loading data\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"05/16/2020 09:42:37 - INFO - nlp.utils.file_utils -   PyTorch version 1.6.0a0+bf2bbd9 available.\\n\",\n            \"05/16/2020 09:42:37 - INFO - nlp.utils.file_utils -   TensorFlow version 2.2.0 available.\\n\",\n            \"05/16/2020 09:42:37 - INFO - transformers.modeling_utils -   Weights of T5ForConditionalGeneration not initialized from pretrained model: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight']\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"loading data\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"05/16/2020 09:42:37 - INFO - nlp.utils.file_utils -   PyTorch version 1.6.0a0+bf2bbd9 available.\\n\",\n            \"05/16/2020 09:42:37 - INFO - nlp.utils.file_utils -   TensorFlow version 2.2.0 available.\\n\",\n            \"05/16/2020 09:42:37 - INFO - transformers.modeling_utils -   Weights of T5ForConditionalGeneration not initialized from pretrained model: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight']\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"loading data\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"05/16/2020 09:42:37 - INFO - nlp.utils.file_utils -   PyTorch version 1.6.0a0+bf2bbd9 available.\\n\",\n            \"05/16/2020 09:42:37 - INFO - nlp.utils.file_utils -   TensorFlow version 2.2.0 available.\\n\",\n            \"05/16/2020 09:42:37 - INFO - transformers.modeling_utils -   Weights of T5ForConditionalGeneration not initialized from pretrained model: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight']\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"loading data\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"05/16/2020 09:42:38 - INFO - nlp.utils.file_utils -   PyTorch version 1.6.0a0+bf2bbd9 available.\\n\",\n            \"05/16/2020 09:42:38 - INFO - nlp.utils.file_utils -   TensorFlow version 2.2.0 available.\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"loading done\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"05/16/2020 09:42:38 - INFO - transformers.trainer -   You are instantiating a Trainer but W&B is not installed. To use wandb logging, run `pip install wandb; wandb login` see https://docs.wandb.com/huggingface.\\n\",\n            \"05/16/2020 09:42:38 - INFO - transformers.modeling_utils -   Weights of T5ForConditionalGeneration not initialized from pretrained model: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight']\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"loading data\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"05/16/2020 09:42:38 - INFO - nlp.utils.file_utils -   PyTorch version 1.6.0a0+bf2bbd9 available.\\n\",\n            \"05/16/2020 09:42:38 - INFO - nlp.utils.file_utils -   TensorFlow version 2.2.0 available.\\n\",\n            \"05/16/2020 09:42:38 - INFO - transformers.modeling_utils -   Weights of T5ForConditionalGeneration not initialized from pretrained model: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight']\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"loading data\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"05/16/2020 09:42:38 - INFO - nlp.utils.file_utils -   PyTorch version 1.6.0a0+bf2bbd9 available.\\n\",\n            \"05/16/2020 09:42:38 - INFO - nlp.utils.file_utils -   TensorFlow version 2.2.0 available.\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"loading done\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"05/16/2020 09:42:40 - INFO - transformers.trainer -   You are instantiating a Trainer but W&B is not installed. To use wandb logging, run `pip install wandb; wandb login` see https://docs.wandb.com/huggingface.\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"loading done\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"05/16/2020 09:42:40 - INFO - transformers.trainer -   You are instantiating a Trainer but W&B is not installed. To use wandb logging, run `pip install wandb; wandb login` see https://docs.wandb.com/huggingface.\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"loading done\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"05/16/2020 09:42:40 - INFO - transformers.trainer -   You are instantiating a Trainer but W&B is not installed. To use wandb logging, run `pip install wandb; wandb login` see https://docs.wandb.com/huggingface.\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"loading done\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"05/16/2020 09:42:41 - INFO - transformers.trainer -   You are instantiating a Trainer but W&B is not installed. To use wandb logging, run `pip install wandb; wandb login` see https://docs.wandb.com/huggingface.\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"loading done\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"05/16/2020 09:42:41 - INFO - transformers.trainer -   You are instantiating a Trainer but W&B is not installed. To use wandb logging, run `pip install wandb; wandb login` see https://docs.wandb.com/huggingface.\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"loading done\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"05/16/2020 09:42:41 - INFO - transformers.trainer -   You are instantiating a Trainer but W&B is not installed. To use wandb logging, run `pip install wandb; wandb login` see https://docs.wandb.com/huggingface.\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"loading done\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"05/16/2020 09:42:41 - INFO - transformers.trainer -   You are instantiating a Trainer but W&B is not installed. To use wandb logging, run `pip install wandb; wandb login` see https://docs.wandb.com/huggingface.\\n\",\n            \"05/16/2020 09:43:20 - INFO - transformers.trainer -   ***** Running training *****\\n\",\n            \"05/16/2020 09:43:20 - INFO - transformers.trainer -     Num examples = 87599\\n\",\n            \"05/16/2020 09:43:20 - INFO - transformers.trainer -     Num Epochs = 4\\n\",\n            \"05/16/2020 09:43:20 - INFO - transformers.trainer -     Instantaneous batch size per device = 8\\n\",\n            \"05/16/2020 09:43:20 - INFO - transformers.trainer -     Total train batch size (w. parallel, distributed & accumulation) = 64\\n\",\n            \"05/16/2020 09:43:20 - INFO - transformers.trainer -     Gradient Accumulation steps = 4\\n\",\n            \"05/16/2020 09:43:20 - INFO - transformers.trainer -     Total optimization steps = 1368\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"1d197b0946534a91b2eb4595baade174\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=0.0, description='Epoch', max=4.0, style=ProgressStyle(description_width='i…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"a558cfceca8b43568dfad2267a534a35\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=0.0, description='Iteration', max=1369.0, style=ProgressStyle(description_w…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"05/16/2020 09:43:33 - INFO - transformers.trainer -   ***** Running training *****\\n\",\n            \"05/16/2020 09:43:33 - INFO - transformers.trainer -     Num examples = 87599\\n\",\n            \"05/16/2020 09:43:33 - INFO - transformers.trainer -     Num Epochs = 4\\n\",\n            \"05/16/2020 09:43:33 - INFO - transformers.trainer -     Instantaneous batch size per device = 8\\n\",\n            \"05/16/2020 09:43:33 - INFO - transformers.trainer -     Total train batch size (w. parallel, distributed & accumulation) = 64\\n\",\n            \"05/16/2020 09:43:33 - INFO - transformers.trainer -     Gradient Accumulation steps = 4\\n\",\n            \"05/16/2020 09:43:33 - INFO - transformers.trainer -     Total optimization steps = 1368\\n\",\n            \"05/16/2020 09:43:36 - INFO - transformers.trainer -   ***** Running training *****\\n\",\n            \"05/16/2020 09:43:36 - INFO - transformers.trainer -     Num examples = 87599\\n\",\n            \"05/16/2020 09:43:36 - INFO - transformers.trainer -     Num Epochs = 4\\n\",\n            \"05/16/2020 09:43:36 - INFO - transformers.trainer -     Instantaneous batch size per device = 8\\n\",\n            \"05/16/2020 09:43:36 - INFO - transformers.trainer -     Total train batch size (w. parallel, distributed & accumulation) = 64\\n\",\n            \"05/16/2020 09:43:36 - INFO - transformers.trainer -     Gradient Accumulation steps = 4\\n\",\n            \"05/16/2020 09:43:36 - INFO - transformers.trainer -     Total optimization steps = 1368\\n\",\n            \"05/16/2020 09:43:40 - INFO - transformers.trainer -   ***** Running training *****\\n\",\n            \"05/16/2020 09:43:40 - INFO - transformers.trainer -     Num examples = 87599\\n\",\n            \"05/16/2020 09:43:40 - INFO - transformers.trainer -     Num Epochs = 4\\n\",\n            \"05/16/2020 09:43:40 - INFO - transformers.trainer -     Instantaneous batch size per device = 8\\n\",\n            \"05/16/2020 09:43:40 - INFO - transformers.trainer -     Total train batch size (w. parallel, distributed & accumulation) = 64\\n\",\n            \"05/16/2020 09:43:40 - INFO - transformers.trainer -     Gradient Accumulation steps = 4\\n\",\n            \"05/16/2020 09:43:40 - INFO - transformers.trainer -     Total optimization steps = 1368\\n\",\n            \"05/16/2020 09:43:44 - INFO - transformers.trainer -   ***** Running training *****\\n\",\n            \"05/16/2020 09:43:44 - INFO - transformers.trainer -     Num examples = 87599\\n\",\n            \"05/16/2020 09:43:44 - INFO - transformers.trainer -     Num Epochs = 4\\n\",\n            \"05/16/2020 09:43:44 - INFO - transformers.trainer -     Instantaneous batch size per device = 8\\n\",\n            \"05/16/2020 09:43:44 - INFO - transformers.trainer -     Total train batch size (w. parallel, distributed & accumulation) = 64\\n\",\n            \"05/16/2020 09:43:44 - INFO - transformers.trainer -     Gradient Accumulation steps = 4\\n\",\n            \"05/16/2020 09:43:44 - INFO - transformers.trainer -     Total optimization steps = 1368\\n\",\n            \"05/16/2020 09:43:45 - INFO - transformers.trainer -   ***** Running training *****\\n\",\n            \"05/16/2020 09:43:45 - INFO - transformers.trainer -     Num examples = 87599\\n\",\n            \"05/16/2020 09:43:45 - INFO - transformers.trainer -     Num Epochs = 4\\n\",\n            \"05/16/2020 09:43:45 - INFO - transformers.trainer -     Instantaneous batch size per device = 8\\n\",\n            \"05/16/2020 09:43:45 - INFO - transformers.trainer -     Total train batch size (w. parallel, distributed & accumulation) = 64\\n\",\n            \"05/16/2020 09:43:45 - INFO - transformers.trainer -     Gradient Accumulation steps = 4\\n\",\n            \"05/16/2020 09:43:45 - INFO - transformers.trainer -     Total optimization steps = 1368\\n\",\n            \"05/16/2020 09:43:46 - INFO - transformers.trainer -   ***** Running training *****\\n\",\n            \"05/16/2020 09:43:46 - INFO - transformers.trainer -     Num examples = 87599\\n\",\n            \"05/16/2020 09:43:46 - INFO - transformers.trainer -     Num Epochs = 4\\n\",\n            \"05/16/2020 09:43:46 - INFO - transformers.trainer -     Instantaneous batch size per device = 8\\n\",\n            \"05/16/2020 09:43:46 - INFO - transformers.trainer -     Total train batch size (w. parallel, distributed & accumulation) = 64\\n\",\n            \"05/16/2020 09:43:46 - INFO - transformers.trainer -     Gradient Accumulation steps = 4\\n\",\n            \"05/16/2020 09:43:46 - INFO - transformers.trainer -     Total optimization steps = 1368\\n\",\n            \"05/16/2020 09:43:47 - INFO - transformers.trainer -   ***** Running training *****\\n\",\n            \"05/16/2020 09:43:47 - INFO - transformers.trainer -     Num examples = 87599\\n\",\n            \"05/16/2020 09:43:47 - INFO - transformers.trainer -     Num Epochs = 4\\n\",\n            \"05/16/2020 09:43:47 - INFO - transformers.trainer -     Instantaneous batch size per device = 8\\n\",\n            \"05/16/2020 09:43:47 - INFO - transformers.trainer -     Total train batch size (w. parallel, distributed & accumulation) = 64\\n\",\n            \"05/16/2020 09:43:47 - INFO - transformers.trainer -     Gradient Accumulation steps = 4\\n\",\n            \"05/16/2020 09:43:47 - INFO - transformers.trainer -     Total optimization steps = 1368\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"2c4d7bdf7f6c48f0825ef87a12399f39\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=0.0, description='Iteration', max=1369.0, style=ProgressStyle(description_w…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"05/16/2020 10:03:44 - INFO - transformers.trainer -   \\n\",\n            \"\\n\",\n            \"Training completed. Do not forget to share your model on huggingface.co/models =)\\n\",\n            \"\\n\",\n            \"\\n\",\n            \"05/16/2020 10:03:44 - INFO - transformers.trainer -   \\n\",\n            \"\\n\",\n            \"Training completed. Do not forget to share your model on huggingface.co/models =)\\n\",\n            \"\\n\",\n            \"\\n\",\n            \"05/16/2020 10:03:44 - INFO - transformers.trainer -   \\n\",\n            \"\\n\",\n            \"Training completed. Do not forget to share your model on huggingface.co/models =)\\n\",\n            \"\\n\",\n            \"\\n\",\n            \"05/16/2020 10:03:44 - INFO - transformers.trainer -   \\n\",\n            \"\\n\",\n            \"Training completed. Do not forget to share your model on huggingface.co/models =)\\n\",\n            \"\\n\",\n            \"\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"05/16/2020 10:03:44 - INFO - transformers.trainer -   \\n\",\n            \"\\n\",\n            \"Training completed. Do not forget to share your model on huggingface.co/models =)\\n\",\n            \"\\n\",\n            \"\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"a9a7aeae6fa74b67a349a69e845f8898\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=0.0, description='Iteration', max=1369.0, style=ProgressStyle(description_w…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"c227333162114432a38b1480ec837701\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=0.0, description='Iteration', max=1369.0, style=ProgressStyle(description_w…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\",\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"05/16/2020 10:03:44 - INFO - transformers.trainer -   \\n\",\n            \"\\n\",\n            \"Training completed. Do not forget to share your model on huggingface.co/models =)\\n\",\n            \"\\n\",\n            \"\\n\",\n            \"05/16/2020 10:03:44 - INFO - transformers.trainer -   \\n\",\n            \"\\n\",\n            \"Training completed. Do not forget to share your model on huggingface.co/models =)\\n\",\n            \"\\n\",\n            \"\\n\",\n            \"05/16/2020 10:03:44 - INFO - transformers.trainer -   Saving model checkpoint to ./models/tpu\\n\",\n            \"05/16/2020 10:03:45 - INFO - transformers.trainer -   Saving model checkpoint to ./models/tpu\\n\",\n            \"05/16/2020 10:03:45 - INFO - transformers.trainer -   Saving model checkpoint to ./models/tpu\\n\",\n            \"05/16/2020 10:03:45 - INFO - transformers.trainer -   Saving model checkpoint to ./models/tpu\\n\",\n            \"05/16/2020 10:03:44 - INFO - transformers.trainer -   \\n\",\n            \"\\n\",\n            \"Training completed. Do not forget to share your model on huggingface.co/models =)\\n\",\n            \"\\n\",\n            \"\\n\",\n            \"05/16/2020 10:03:45 - INFO - transformers.trainer -   Saving model checkpoint to ./models/tpu\\n\",\n            \"05/16/2020 10:03:45 - INFO - transformers.trainer -   Saving model checkpoint to ./models/tpu\\n\",\n            \"05/16/2020 10:03:45 - INFO - transformers.trainer -   Saving model checkpoint to ./models/tpu\\n\",\n            \"05/16/2020 10:03:45 - INFO - transformers.trainer -   Saving model checkpoint to ./models/tpu\\n\",\n            \"05/16/2020 10:03:45 - INFO - transformers.configuration_utils -   Configuration saved in ./models/tpu/config.json\\n\",\n            \"05/16/2020 10:03:54 - INFO - transformers.modeling_utils -   Model weights saved in ./models/tpu/pytorch_model.bin\\n\",\n            \"05/16/2020 10:03:54 - INFO - transformers.modeling_utils -   Model weights saved in ./models/tpu/pytorch_model.bin\\n\",\n            \"05/16/2020 10:03:54 - INFO - transformers.modeling_utils -   Model weights saved in ./models/tpu/pytorch_model.bin\\n\",\n            \"05/16/2020 10:03:54 - INFO - transformers.modeling_utils -   Model weights saved in ./models/tpu/pytorch_model.bin\\n\",\n            \"05/16/2020 10:03:54 - INFO - transformers.modeling_utils -   Model weights saved in ./models/tpu/pytorch_model.bin\\n\",\n            \"05/16/2020 10:03:54 - INFO - transformers.modeling_utils -   Model weights saved in ./models/tpu/pytorch_model.bin\\n\",\n            \"05/16/2020 10:03:54 - INFO - transformers.modeling_utils -   Model weights saved in ./models/tpu/pytorch_model.bin\\n\",\n            \"05/16/2020 10:03:55 - INFO - transformers.modeling_utils -   Model weights saved in ./models/tpu/pytorch_model.bin\\n\"\n          ],\n          \"name\": \"stderr\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"LAe5vnbi-dyx\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"## Eval\"\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"VxWscyGVl05C\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"There are two gotchas here. First the metrics functionality in the nlp package is still work-in-progress so we will use the official squad evaluation script. Second, for some reason which I couldn't figure out, the `.generate` method is not working on TPU so will need to do prediction on CPU. For predicting the validation set it almost takes 40 mins.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"H8AbD1B7TR0k\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"## SQuAD evaluation script. Modifed slightly for this notebook\\n\",\n        \"\\n\",\n        \"from __future__ import print_function\\n\",\n        \"from collections import Counter\\n\",\n        \"import string\\n\",\n        \"import re\\n\",\n        \"import argparse\\n\",\n        \"import json\\n\",\n        \"import sys\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"def normalize_answer(s):\\n\",\n        \"    \\\"\\\"\\\"Lower text and remove punctuation, articles and extra whitespace.\\\"\\\"\\\"\\n\",\n        \"    def remove_articles(text):\\n\",\n        \"        return re.sub(r'\\\\b(a|an|the)\\\\b', ' ', text)\\n\",\n        \"\\n\",\n        \"    def white_space_fix(text):\\n\",\n        \"        return ' '.join(text.split())\\n\",\n        \"\\n\",\n        \"    def remove_punc(text):\\n\",\n        \"        exclude = set(string.punctuation)\\n\",\n        \"        return ''.join(ch for ch in text if ch not in exclude)\\n\",\n        \"\\n\",\n        \"    def lower(text):\\n\",\n        \"        return text.lower()\\n\",\n        \"\\n\",\n        \"    return white_space_fix(remove_articles(remove_punc(lower(s))))\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"def f1_score(prediction, ground_truth):\\n\",\n        \"    prediction_tokens = normalize_answer(prediction).split()\\n\",\n        \"    ground_truth_tokens = normalize_answer(ground_truth).split()\\n\",\n        \"    common = Counter(prediction_tokens) & Counter(ground_truth_tokens)\\n\",\n        \"    num_same = sum(common.values())\\n\",\n        \"    if num_same == 0:\\n\",\n        \"        return 0\\n\",\n        \"    precision = 1.0 * num_same / len(prediction_tokens)\\n\",\n        \"    recall = 1.0 * num_same / len(ground_truth_tokens)\\n\",\n        \"    f1 = (2 * precision * recall) / (precision + recall)\\n\",\n        \"    return f1\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"def exact_match_score(prediction, ground_truth):\\n\",\n        \"    return (normalize_answer(prediction) == normalize_answer(ground_truth))\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"def metric_max_over_ground_truths(metric_fn, prediction, ground_truths):\\n\",\n        \"    scores_for_ground_truths = []\\n\",\n        \"    for ground_truth in ground_truths:\\n\",\n        \"        score = metric_fn(prediction, ground_truth)\\n\",\n        \"        scores_for_ground_truths.append(score)\\n\",\n        \"    return max(scores_for_ground_truths)\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"def evaluate(gold_answers, predictions):\\n\",\n        \"    f1 = exact_match = total = 0\\n\",\n        \"\\n\",\n        \"    for ground_truths, prediction in zip(gold_answers, predictions):\\n\",\n        \"      total += 1\\n\",\n        \"      exact_match += metric_max_over_ground_truths(\\n\",\n        \"                    exact_match_score, prediction, ground_truths)\\n\",\n        \"      f1 += metric_max_over_ground_truths(\\n\",\n        \"          f1_score, prediction, ground_truths)\\n\",\n        \"    \\n\",\n        \"    exact_match = 100.0 * exact_match / total\\n\",\n        \"    f1 = 100.0 * f1 / total\\n\",\n        \"\\n\",\n        \"    return {'exact_match': exact_match, 'f1': f1}\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"FCZVPIUK9fyn\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"import torch\\n\",\n        \"import torch_xla\\n\",\n        \"import torch_xla.core.xla_model as xm\\n\",\n        \"\\n\",\n        \"import nlp\\n\",\n        \"from transformers import T5ForConditionalGeneration, T5Tokenizer\\n\",\n        \"\\n\",\n        \"from tqdm.auto import tqdm\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"vmV9c-w39c7C\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"model = T5ForConditionalGeneration.from_pretrained('models/tpu').to('cpu') # because its loaded on xla by default\\n\",\n        \"tokenizer = T5Tokenizer.from_pretrained('models/tpu')\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"vjstID24-IAw\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"valid_dataset = torch.load('valid_data.pt')\\n\",\n        \"dataloader = torch.utils.data.DataLoader(valid_dataset, batch_size=32)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"v7TUzb-T-YtF\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"c4d045a2-fd25-476d-9461-68571523b861\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 66,\n          \"referenced_widgets\": [\n            \"df609e7f302e48949a49fae1b0deeb54\",\n            \"be907685a2b949d0a088a907a2cb2a90\",\n            \"743bba68cbe943d78ce63a1efd70d929\",\n            \"f3fffc31462b49e193b3c24d600b4ddb\",\n            \"87fa58101b24400da90e4b0f87c7cb1a\",\n            \"3c39d97054ba47a3ad3d9bb9a882756e\",\n            \"20855b3cb56944cfb0d6aa13bd06429a\",\n            \"779d80cc14104171ab40ff53170e4807\"\n          ]\n        }\n      },\n      \"source\": [\n        \"answers = []\\n\",\n        \"for batch in tqdm(dataloader):\\n\",\n        \"  outs = model.generate(input_ids=batch['input_ids'], \\n\",\n        \"                        attention_mask=batch['attention_mask'],\\n\",\n        \"                        max_length=16,\\n\",\n        \"                        early_stopping=True)\\n\",\n        \"  outs = [tokenizer.decode(ids) for ids in outs]\\n\",\n        \"  answers.extend(outs)\"\n      ],\n      \"execution_count\": 24,\n      \"outputs\": [\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"df609e7f302e48949a49fae1b0deeb54\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=0.0, max=331.0), HTML(value='')))\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"VJ7CFrEtLD4F\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"predictions = []\\n\",\n        \"references = []\\n\",\n        \"for ref, pred in zip(valid_dataset, answers):\\n\",\n        \"  predictions.append(pred)\\n\",\n        \"  references.append(ref['answers']['text'])\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"BDpnF6NmMWEl\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"79f82ae2-8536-4e7c-f0c4-8a1fbcab8aa7\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 34\n        }\n      },\n      \"source\": [\n        \"predictions[0], references[0]\"\n      ],\n      \"execution_count\": 26,\n      \"outputs\": [\n        {\n          \"output_type\": \"execute_result\",\n          \"data\": {\n            \"text/plain\": [\n              \"('Denver Broncos', ['Denver Broncos', 'Denver Broncos', 'Denver Broncos'])\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          },\n          \"execution_count\": 26\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"dtRfqm3Odgh1\",\n        \"colab_type\": \"code\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 34\n        },\n        \"outputId\": \"7d6b5e09-63a6-415f-fb95-9485bb8f7fbc\"\n      },\n      \"source\": [\n        \"evaluate(references, predictions)\"\n      ],\n      \"execution_count\": 27,\n      \"outputs\": [\n        {\n          \"output_type\": \"execute_result\",\n          \"data\": {\n            \"text/plain\": [\n              \"{'exact_match': 81.56102175969725, 'f1': 89.96016967193422}\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          },\n          \"execution_count\": 27\n        }\n      ]\n    }\n  ]\n}"
  },
  {
    "path": "t5_fine_tuning.ipynb",
    "content": "{\n  \"nbformat\": 4,\n  \"nbformat_minor\": 0,\n  \"metadata\": {\n    \"colab\": {\n      \"name\": \"t5_fine-tuning\",\n      \"provenance\": [],\n      \"collapsed_sections\": [\n        \"HVxGfmEMCKs_\",\n        \"RKNr7fgzcKpZ\",\n        \"vfhlYUUV2NIh\",\n        \"b3C13iabZvwK\",\n        \"qdEgCwL7cIyi\",\n        \"W4cfw8bMcNdA\",\n        \"brPOSAkjNP5t\",\n        \"Dhqigmiw2hVh\",\n        \"0B4IhzEgO21B\",\n        \"cANrUEXhO8QY\",\n        \"DEWi6c-pGZV9\",\n        \"GwdWdHG0RP5J\",\n        \"iq8M8nbTSJlE\",\n        \"vZ-YLmJyg64T\",\n        \"hOxk-ZoJmamm\",\n        \"aVfmE4O3Ku7H\",\n        \"AgNV3TMzqSvj\"\n      ],\n      \"machine_shape\": \"hm\",\n      \"include_colab_link\": true\n    },\n    \"kernelspec\": {\n      \"name\": \"python3\",\n      \"display_name\": \"Python 3\"\n    },\n    \"accelerator\": \"GPU\",\n    \"widgets\": {\n      \"application/vnd.jupyter.widget-state+json\": {\n        \"7d8f60bfc0a248e58028b6e8a477a5f7\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_72dc1e39b931429883e68c0603797896\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_cde60c5e18f04ba792fff8c2ac33f470\",\n              \"IPY_MODEL_c0c0df12695b4a1eacf8fa4ccc0ac62c\"\n            ]\n          }\n        },\n        \"72dc1e39b931429883e68c0603797896\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"cde60c5e18f04ba792fff8c2ac33f470\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_72ea881ce3f445a9983d858b76dd257b\",\n            \"_dom_classes\": [],\n            \"description\": \"Downloading: 100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"success\",\n            \"max\": 791656,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 791656,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_d0f0c28a14b242f8990a547ed7f87c04\"\n          }\n        },\n        \"c0c0df12695b4a1eacf8fa4ccc0ac62c\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_f97741534b554be3b5cdccd45c73b317\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 792k/792k [02:08&lt;00:00, 6.18kB/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_1e70a3dc7090487fa883e932bff395cb\"\n          }\n        },\n        \"72ea881ce3f445a9983d858b76dd257b\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"d0f0c28a14b242f8990a547ed7f87c04\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"f97741534b554be3b5cdccd45c73b317\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"1e70a3dc7090487fa883e932bff395cb\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"f414bac332054c7f86af89b8e50c7d73\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_1d9c52a1bb8843b6b0f151571cbf30a4\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_ed039b8125714030b03912fb29a93ca4\",\n              \"IPY_MODEL_d9b445b8b3b04569adf22429259b4954\"\n            ]\n          }\n        },\n        \"1d9c52a1bb8843b6b0f151571cbf30a4\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"ed039b8125714030b03912fb29a93ca4\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_6c61b3c76d7045eb825172ba51b3fa63\",\n            \"_dom_classes\": [],\n            \"description\": \"Downloading: 100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"success\",\n            \"max\": 1199,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 1199,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_d11ffd1efc024c1ca86276430d29fd1e\"\n          }\n        },\n        \"d9b445b8b3b04569adf22429259b4954\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_22fac35d924f464ca0b33be21a566a86\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 1.20k/1.20k [00:20&lt;00:00, 58.3B/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_cfe128b0d2c648c18d2255b3f8506a09\"\n          }\n        },\n        \"6c61b3c76d7045eb825172ba51b3fa63\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"d11ffd1efc024c1ca86276430d29fd1e\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"22fac35d924f464ca0b33be21a566a86\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"cfe128b0d2c648c18d2255b3f8506a09\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"c34ac6d2548249819c1eab28956edec4\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_de2c77b3fb0f4dba99f92062b2db5328\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_6ea23f0979824aac935f3f1ad10a86cd\",\n              \"IPY_MODEL_6452bc3b5ad445a8a5e272207fe4504d\"\n            ]\n          }\n        },\n        \"de2c77b3fb0f4dba99f92062b2db5328\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"6ea23f0979824aac935f3f1ad10a86cd\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_d6ef508766c54f8993d1d1f3d7cac040\",\n            \"_dom_classes\": [],\n            \"description\": \"Downloading: 100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"success\",\n            \"max\": 891691430,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 891691430,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_1b69bbddeb244defab9e21690a45c79e\"\n          }\n        },\n        \"6452bc3b5ad445a8a5e272207fe4504d\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_4a2b56fd6780470ab1574509fa432183\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 892M/892M [00:17&lt;00:00, 51.3MB/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_3853231cd966465882a93fad9c5dc428\"\n          }\n        },\n        \"d6ef508766c54f8993d1d1f3d7cac040\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"1b69bbddeb244defab9e21690a45c79e\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"4a2b56fd6780470ab1574509fa432183\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"3853231cd966465882a93fad9c5dc428\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"915a0b65612243668570c555a47a6c37\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_c85b348624504af294b78de744969493\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_d56a6918840e4f6588af5da5f8f54015\",\n              \"IPY_MODEL_41db48cf488a4522b1f04b33c2261262\"\n            ]\n          }\n        },\n        \"c85b348624504af294b78de744969493\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": \"row wrap\",\n            \"width\": \"100%\",\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": \"inline-flex\",\n            \"left\": null\n          }\n        },\n        \"d56a6918840e4f6588af5da5f8f54015\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_8c2d9ac8c22f486299949f4cbed16437\",\n            \"_dom_classes\": [],\n            \"description\": \"Validation sanity check: 100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"info\",\n            \"max\": 1,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 1,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_222974dba69145e7b171360bec239ba5\"\n          }\n        },\n        \"41db48cf488a4522b1f04b33c2261262\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_9e95200811bb497ab0ac0229f5e0ddaa\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 5/5 [00:01&lt;00:00,  3.24it/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_3773b14f23974ad3a5bbb7ff947e68ca\"\n          }\n        },\n        \"8c2d9ac8c22f486299949f4cbed16437\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"222974dba69145e7b171360bec239ba5\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": \"2\",\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"9e95200811bb497ab0ac0229f5e0ddaa\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"3773b14f23974ad3a5bbb7ff947e68ca\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"3ec26f803d124dd0877e1ce0e3517f68\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_aabb0b2f2ae64684a80f1ea39c9a7d1b\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_885696e0606c4353a5d21feec03aebc7\",\n              \"IPY_MODEL_659dd7302f3a40038834c4f1d8e59250\"\n            ]\n          }\n        },\n        \"aabb0b2f2ae64684a80f1ea39c9a7d1b\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": \"row wrap\",\n            \"width\": \"100%\",\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": \"inline-flex\",\n            \"left\": null\n          }\n        },\n        \"885696e0606c4353a5d21feec03aebc7\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_6f3859c80aa945e4b4ae2aa957755b7c\",\n            \"_dom_classes\": [],\n            \"description\": \"Epoch 2: 100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"success\",\n            \"max\": 3125,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 3125,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_a840a738d20b4f43baf18453db53fdf0\"\n          }\n        },\n        \"659dd7302f3a40038834c4f1d8e59250\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_f7139c4e04374ffbafe6a849500c6369\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 3125/3125 [54:28&lt;00:00,  1.05s/it, loss=0.003, v_num=0, val_loss=0.0874]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_ef8f0b7c9b0c4f829e3ad59e83cbdd67\"\n          }\n        },\n        \"6f3859c80aa945e4b4ae2aa957755b7c\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"a840a738d20b4f43baf18453db53fdf0\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": \"2\",\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"f7139c4e04374ffbafe6a849500c6369\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"ef8f0b7c9b0c4f829e3ad59e83cbdd67\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"dbe7a4854b8f420faaea8de4583fb1f0\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_4d1f674483d44e559ae1de553dd1d726\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_ce506c0137914e4db93b9db35154c62a\",\n              \"IPY_MODEL_e92a181ff64d4e0290236a91cbdb8d67\"\n            ]\n          }\n        },\n        \"4d1f674483d44e559ae1de553dd1d726\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": \"row wrap\",\n            \"width\": \"100%\",\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": \"inline-flex\",\n            \"left\": null\n          }\n        },\n        \"ce506c0137914e4db93b9db35154c62a\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_e8f7179c238e4d2d91d456b2c07e1b3e\",\n            \"_dom_classes\": [],\n            \"description\": \"Validating: 100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"info\",\n            \"max\": 1,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 1,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_e67100d71b5047158ab48ef0fd36cb99\"\n          }\n        },\n        \"e92a181ff64d4e0290236a91cbdb8d67\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_17f7e321de81404dabaa3e84fadce2cf\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 250/250 [00:52&lt;00:00,  4.79it/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_a15e2fcc467242cb9fad5b2082a70c39\"\n          }\n        },\n        \"e8f7179c238e4d2d91d456b2c07e1b3e\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"e67100d71b5047158ab48ef0fd36cb99\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": \"2\",\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"17f7e321de81404dabaa3e84fadce2cf\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"a15e2fcc467242cb9fad5b2082a70c39\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"f40c9bf16c9a473ba758a6439dce2652\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_8d17a251bf1440d4aa8513ad5f15ba1d\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_165319529b364183ae344a9a14f5bc52\",\n              \"IPY_MODEL_3d0c08f3abbe421d83f2b35583221291\"\n            ]\n          }\n        },\n        \"8d17a251bf1440d4aa8513ad5f15ba1d\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": \"row wrap\",\n            \"width\": \"100%\",\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": \"inline-flex\",\n            \"left\": null\n          }\n        },\n        \"165319529b364183ae344a9a14f5bc52\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_6e851577f682494c894b9afdd07b1201\",\n            \"_dom_classes\": [],\n            \"description\": \"Validating: 100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"info\",\n            \"max\": 1,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 1,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_e67e9e945a9c430f9844946cd81aae3a\"\n          }\n        },\n        \"3d0c08f3abbe421d83f2b35583221291\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_34fbc6e29df046faaedd9fe3230559cb\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 250/250 [00:53&lt;00:00,  4.71it/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_bbbdd81a2e8f4d68b33d698f45ccc9ae\"\n          }\n        },\n        \"6e851577f682494c894b9afdd07b1201\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"e67e9e945a9c430f9844946cd81aae3a\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": \"2\",\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"34fbc6e29df046faaedd9fe3230559cb\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"bbbdd81a2e8f4d68b33d698f45ccc9ae\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"6aaf51cb9ad44c94b6a174a8768904f7\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_51d23e1199274477a69557c74609afb2\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_029f74818c6842d7a28af62032418880\",\n              \"IPY_MODEL_8db144e9144141779a1088c4bc000a99\"\n            ]\n          }\n        },\n        \"51d23e1199274477a69557c74609afb2\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"029f74818c6842d7a28af62032418880\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_210517aede4f4cfab9120fdeb3d8361a\",\n            \"_dom_classes\": [],\n            \"description\": \"100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"success\",\n            \"max\": 782,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 782,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_df9bc2dc2b3c4fee98affdd7f5ca1ef6\"\n          }\n        },\n        \"8db144e9144141779a1088c4bc000a99\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_b684a47485af4cb1934d57cbb03a4f57\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 782/782 [10:38&lt;00:00,  1.22it/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_942d20b134964d1d895af69938918464\"\n          }\n        },\n        \"210517aede4f4cfab9120fdeb3d8361a\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"df9bc2dc2b3c4fee98affdd7f5ca1ef6\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"b684a47485af4cb1934d57cbb03a4f57\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"942d20b134964d1d895af69938918464\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"0037bb8409bb4d65ac4ebd956fd1e631\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_db528e3117024014b4d281b650901cbd\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_350fc08aa59849fc9fd3f3e454583a6c\",\n              \"IPY_MODEL_be936dd408314d0d90a22f627ca517ca\"\n            ]\n          }\n        },\n        \"db528e3117024014b4d281b650901cbd\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"350fc08aa59849fc9fd3f3e454583a6c\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_99f56e1a8fdb4b2282fa6e17819d044e\",\n            \"_dom_classes\": [],\n            \"description\": \"Downloading: 100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"success\",\n            \"max\": 791656,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 791656,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_462bd815ddbc4687bcf7695f59919f0c\"\n          }\n        },\n        \"be936dd408314d0d90a22f627ca517ca\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_40edb7d92c1145ee9e3bb823e4688e16\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 792k/792k [00:06&lt;00:00, 131kB/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_f827cd8a6bf846c590913c5ea40e6737\"\n          }\n        },\n        \"99f56e1a8fdb4b2282fa6e17819d044e\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"462bd815ddbc4687bcf7695f59919f0c\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"40edb7d92c1145ee9e3bb823e4688e16\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"f827cd8a6bf846c590913c5ea40e6737\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"61d58772a6a64c5c8ad30dab2563a56f\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_4000e73e6d804763986dc9a9c74456aa\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_0dd99276ab294c939d83320f4674d5c2\",\n              \"IPY_MODEL_d306f7ff1ec94561aeed9ff59ba9b54b\"\n            ]\n          }\n        },\n        \"4000e73e6d804763986dc9a9c74456aa\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": \"row wrap\",\n            \"width\": \"100%\",\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": \"inline-flex\",\n            \"left\": null\n          }\n        },\n        \"0dd99276ab294c939d83320f4674d5c2\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_0893a9730450433fa76a74b008a6f482\",\n            \"_dom_classes\": [],\n            \"description\": \"Validation sanity check: 100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"info\",\n            \"max\": 1,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 1,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_f8873c7201e1410cb0ec52cb7e34c3c9\"\n          }\n        },\n        \"d306f7ff1ec94561aeed9ff59ba9b54b\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_234eb8b041c44358b2f993c2853162f7\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 5/5 [00:01&lt;00:00,  3.74it/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_8f73da698e85474fbecfd91bb7770c56\"\n          }\n        },\n        \"0893a9730450433fa76a74b008a6f482\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"f8873c7201e1410cb0ec52cb7e34c3c9\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": \"2\",\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"234eb8b041c44358b2f993c2853162f7\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"8f73da698e85474fbecfd91bb7770c56\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"26a0cb124049417aa9dbdd010e3af03a\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_8a14bd8f2a424b15b48426fd5e320678\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_09ed6242c5ef4a4791a1074ff7e4616e\",\n              \"IPY_MODEL_487a6ea92fe0463ebbcb63094fde5136\"\n            ]\n          }\n        },\n        \"8a14bd8f2a424b15b48426fd5e320678\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": \"row wrap\",\n            \"width\": \"100%\",\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": \"inline-flex\",\n            \"left\": null\n          }\n        },\n        \"09ed6242c5ef4a4791a1074ff7e4616e\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_c050be8414044acdb1a496495d148302\",\n            \"_dom_classes\": [],\n            \"description\": \"Epoch 2: 100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"success\",\n            \"max\": 2250,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 2250,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_56a67d534f284df0bc1121f1e264f5e2\"\n          }\n        },\n        \"487a6ea92fe0463ebbcb63094fde5136\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_f168c4ae2d014e89bacc58e43427302e\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 2250/2250 [20:19&lt;00:00,  1.84it/s, loss=0.005, v_num=1, val_loss=0.0696]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_5cabe7d5ed6b46be882c558d28a29ca2\"\n          }\n        },\n        \"c050be8414044acdb1a496495d148302\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"56a67d534f284df0bc1121f1e264f5e2\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": \"2\",\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"f168c4ae2d014e89bacc58e43427302e\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"5cabe7d5ed6b46be882c558d28a29ca2\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"1681a9ce7f9340caa50c4204777a6f9e\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_a9f0c66f958e493286155c8d2631d255\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_e04d6312d5d4425ab726588c485e668c\",\n              \"IPY_MODEL_fab8ee7d5d3940819eb9131efbbad791\"\n            ]\n          }\n        },\n        \"a9f0c66f958e493286155c8d2631d255\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": \"row wrap\",\n            \"width\": \"100%\",\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": \"inline-flex\",\n            \"left\": null\n          }\n        },\n        \"e04d6312d5d4425ab726588c485e668c\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_6dd2781f88eb4549b4203dfec9c1a98e\",\n            \"_dom_classes\": [],\n            \"description\": \"Validating: 100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"info\",\n            \"max\": 1,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 1,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_893ba880ac6545baa6eb4a532ecc5753\"\n          }\n        },\n        \"fab8ee7d5d3940819eb9131efbbad791\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_d4fc7ae628c94a758ce694318bc620ba\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 250/250 [00:48&lt;00:00,  5.24it/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_4c33ca548b5e4738abdac09575e2a325\"\n          }\n        },\n        \"6dd2781f88eb4549b4203dfec9c1a98e\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"893ba880ac6545baa6eb4a532ecc5753\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": \"2\",\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"d4fc7ae628c94a758ce694318bc620ba\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"4c33ca548b5e4738abdac09575e2a325\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"ff475d6cdc074c14aa7b2cfede771b07\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_d77faf8b9ea6480abe594114823ca52f\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_ee4f41b591fe41a5a2d915c343b16c1d\",\n              \"IPY_MODEL_d8946214acc44c4cb97688538daaa33f\"\n            ]\n          }\n        },\n        \"d77faf8b9ea6480abe594114823ca52f\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": \"row wrap\",\n            \"width\": \"100%\",\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": \"inline-flex\",\n            \"left\": null\n          }\n        },\n        \"ee4f41b591fe41a5a2d915c343b16c1d\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_9b9306452732495cbb1acd3e2fcf3b69\",\n            \"_dom_classes\": [],\n            \"description\": \"Validating: 100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"info\",\n            \"max\": 1,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 1,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_f42e9e596ad0485b842fee92d1884750\"\n          }\n        },\n        \"d8946214acc44c4cb97688538daaa33f\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_1d9f8718ba4d4b60997757ea7f1db72b\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 250/250 [00:48&lt;00:00,  5.22it/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_63db466ae63b42a5a79d051ef5af653e\"\n          }\n        },\n        \"9b9306452732495cbb1acd3e2fcf3b69\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"f42e9e596ad0485b842fee92d1884750\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": \"2\",\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"1d9f8718ba4d4b60997757ea7f1db72b\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"63db466ae63b42a5a79d051ef5af653e\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"8933ab7f935e4776970ddfe35f5da135\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_84eb2bf17a9048fc94b6f47867d1b0ba\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_cdd7554792cf4c73922e2f050d1fcaaf\",\n              \"IPY_MODEL_a32aa193a82f478387c14f384c2c689e\"\n            ]\n          }\n        },\n        \"84eb2bf17a9048fc94b6f47867d1b0ba\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"cdd7554792cf4c73922e2f050d1fcaaf\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_e4cbd76c110541cbbf1386e299c4d9d6\",\n            \"_dom_classes\": [],\n            \"description\": \"100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"success\",\n            \"max\": 63,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 63,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_da67548f1abc4727965f72b8cb367681\"\n          }\n        },\n        \"a32aa193a82f478387c14f384c2c689e\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_63b11aa7ee0c4271aedb87ad3e7d23c3\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 63/63 [53:03&lt;00:00, 50.53s/it]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_720b90b3f86c4e5da15447777806e9a7\"\n          }\n        },\n        \"e4cbd76c110541cbbf1386e299c4d9d6\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"da67548f1abc4727965f72b8cb367681\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"63b11aa7ee0c4271aedb87ad3e7d23c3\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"720b90b3f86c4e5da15447777806e9a7\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"78b1b91a08214461b74fb1e143247d1e\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_902a509471004d2691d807c4990fccd2\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_74ec15497e1743a4af6be12e3bc1487d\",\n              \"IPY_MODEL_a70b457d9379403f9fac247de68bb8e3\"\n            ]\n          }\n        },\n        \"902a509471004d2691d807c4990fccd2\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"74ec15497e1743a4af6be12e3bc1487d\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_28f9d9aa0ece4831b0f9e412d8a88f8d\",\n            \"_dom_classes\": [],\n            \"description\": \"Downloading: 100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"success\",\n            \"max\": 791656,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 791656,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_7640680e1006492da75d873726567fed\"\n          }\n        },\n        \"a70b457d9379403f9fac247de68bb8e3\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_1090e3e017564a2281c60fb53a901c75\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 792k/792k [00:04&lt;00:00, 191kB/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_9df2679ba627444e9b76bd2ff0ddc657\"\n          }\n        },\n        \"28f9d9aa0ece4831b0f9e412d8a88f8d\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"7640680e1006492da75d873726567fed\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"1090e3e017564a2281c60fb53a901c75\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"9df2679ba627444e9b76bd2ff0ddc657\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"5c7427d7db844b9691d30cf2de1efc17\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_bb0df1833ee3489da5c2a9c7b1306cc6\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_3d2817812b6f475a8c838fd14646469a\",\n              \"IPY_MODEL_9d0f0c946790477fb8bc8bac64dfd7de\"\n            ]\n          }\n        },\n        \"bb0df1833ee3489da5c2a9c7b1306cc6\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"3d2817812b6f475a8c838fd14646469a\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_8254b8062d5e4280bea46f8bc444c5db\",\n            \"_dom_classes\": [],\n            \"description\": \"Downloading: 100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"success\",\n            \"max\": 1199,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 1199,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_ab5f07ab5c574148a0062eb7f1ce5bcd\"\n          }\n        },\n        \"9d0f0c946790477fb8bc8bac64dfd7de\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_47fdc2009efc443392ecd182996fcca9\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 1.20k/1.20k [00:42&lt;00:00, 28.4B/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_9b705e83fea84cbf912e33d6342be721\"\n          }\n        },\n        \"8254b8062d5e4280bea46f8bc444c5db\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"ab5f07ab5c574148a0062eb7f1ce5bcd\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"47fdc2009efc443392ecd182996fcca9\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"9b705e83fea84cbf912e33d6342be721\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"e8e8ea6199df43019930ac7b557c46a5\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_0566f29b017f47f399d7579d7929e046\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_932309f0a40b46659c0cac7cc37fdc05\",\n              \"IPY_MODEL_da3665141bd44a24a5b5c9f36d4a9c52\"\n            ]\n          }\n        },\n        \"0566f29b017f47f399d7579d7929e046\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"932309f0a40b46659c0cac7cc37fdc05\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_5c98e3a5b6a6403a936a725f4c30cdd3\",\n            \"_dom_classes\": [],\n            \"description\": \"Downloading: 100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"success\",\n            \"max\": 891691430,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 891691430,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_8da2b560fa9348098a2a7f09967d5f5f\"\n          }\n        },\n        \"da3665141bd44a24a5b5c9f36d4a9c52\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_7e37cac227014717987922341f8099fe\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 892M/892M [00:38&lt;00:00, 23.2MB/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_b95f98f98a76434591f90d41b43e39ba\"\n          }\n        },\n        \"5c98e3a5b6a6403a936a725f4c30cdd3\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"8da2b560fa9348098a2a7f09967d5f5f\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"7e37cac227014717987922341f8099fe\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"b95f98f98a76434591f90d41b43e39ba\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"8e79d03deee94b299431330441bd64c8\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_510043ffee634f86b89ec3fc060a74ea\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_e86c5fbd48ce4215a0df353122183982\",\n              \"IPY_MODEL_bfc3a5a3cf2e49868053db6f1ef7785d\"\n            ]\n          }\n        },\n        \"510043ffee634f86b89ec3fc060a74ea\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": \"row wrap\",\n            \"width\": \"100%\",\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": \"inline-flex\",\n            \"left\": null\n          }\n        },\n        \"e86c5fbd48ce4215a0df353122183982\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_361a2f79ed89495894d0b09a709f8f32\",\n            \"_dom_classes\": [],\n            \"description\": \"Validation sanity check: 100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"info\",\n            \"max\": 1,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 1,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_f7e53d55f0234627a3b9f2c90eb8682f\"\n          }\n        },\n        \"bfc3a5a3cf2e49868053db6f1ef7785d\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_3584c01b0c5e47dfa373bae29461e94a\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 5/5 [00:01&lt;00:00,  3.50it/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_cfd9db6f31474a8189e741bf8fdad6a9\"\n          }\n        },\n        \"361a2f79ed89495894d0b09a709f8f32\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"f7e53d55f0234627a3b9f2c90eb8682f\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": \"2\",\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"3584c01b0c5e47dfa373bae29461e94a\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"cfd9db6f31474a8189e741bf8fdad6a9\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"68705cee3df5458fb5145046337d925c\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_4cf1613d58bd450780ac95c994686985\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_3ee5f7cf56394175900ebb14ae0b5f9e\",\n              \"IPY_MODEL_9f054dcf926c45459b7aa728493571a0\"\n            ]\n          }\n        },\n        \"4cf1613d58bd450780ac95c994686985\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": \"row wrap\",\n            \"width\": \"100%\",\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": \"inline-flex\",\n            \"left\": null\n          }\n        },\n        \"3ee5f7cf56394175900ebb14ae0b5f9e\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_b52599dda9d94c83891d1c42c5f557e0\",\n            \"_dom_classes\": [],\n            \"description\": \"Epoch 3:   3%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"danger\",\n            \"max\": 11694,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 396,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_a1cf907a3bcc4177b1d5dd9edbf30c20\"\n          }\n        },\n        \"9f054dcf926c45459b7aa728493571a0\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_82b29ceeb21c417782e9e29a81eb47ea\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 396/11694 [03:40&lt;1:44:57,  1.79it/s, loss=0.017, v_num=0, val_loss=0.327]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_886260804ffd4e11bc93fb6e098111ab\"\n          }\n        },\n        \"b52599dda9d94c83891d1c42c5f557e0\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"a1cf907a3bcc4177b1d5dd9edbf30c20\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": \"2\",\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"82b29ceeb21c417782e9e29a81eb47ea\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"886260804ffd4e11bc93fb6e098111ab\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"69f6eb1cb0434128961b5d83529813c5\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_6723d50588a248d0ad7bb118de8c3fd5\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_86d71b8233c14252a897ffa29ea6d9df\",\n              \"IPY_MODEL_d01c708e22ab423896271fa79860e7c3\"\n            ]\n          }\n        },\n        \"6723d50588a248d0ad7bb118de8c3fd5\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": \"row wrap\",\n            \"width\": \"100%\",\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": \"inline-flex\",\n            \"left\": null\n          }\n        },\n        \"86d71b8233c14252a897ffa29ea6d9df\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_0e8da5995754472fac5fba1f8b30d107\",\n            \"_dom_classes\": [],\n            \"description\": \"Validating: 100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"info\",\n            \"max\": 1,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 1,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_3dbee77f299f4e14a1698b60d609b8a1\"\n          }\n        },\n        \"d01c708e22ab423896271fa79860e7c3\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_8c4c9025aaae44148591ae6f8bb37347\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 2501/2501 [07:28&lt;00:00,  5.90it/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_29e2f2f0914e4dea8117844675b42be5\"\n          }\n        },\n        \"0e8da5995754472fac5fba1f8b30d107\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"3dbee77f299f4e14a1698b60d609b8a1\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": \"2\",\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"8c4c9025aaae44148591ae6f8bb37347\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"29e2f2f0914e4dea8117844675b42be5\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"0cfc8fa73f164b4fa5ddcbc3f115ef9b\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_4559bd35b33f4804b968debaaf316463\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_e403cc7718bf48f1b95150482e083f02\",\n              \"IPY_MODEL_f6248a9db7f2466a9ab3a4fbd214f265\"\n            ]\n          }\n        },\n        \"4559bd35b33f4804b968debaaf316463\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": \"row wrap\",\n            \"width\": \"100%\",\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": \"inline-flex\",\n            \"left\": null\n          }\n        },\n        \"e403cc7718bf48f1b95150482e083f02\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_475e5353d31147d3ab156c0e7835684c\",\n            \"_dom_classes\": [],\n            \"description\": \"Validating: 100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"info\",\n            \"max\": 1,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 1,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_c3f65d683c6e4fe18e31ecc305f8d455\"\n          }\n        },\n        \"f6248a9db7f2466a9ab3a4fbd214f265\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_9b50abad66b44022aa389bc3f312db6b\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 2501/2501 [07:25&lt;00:00,  5.90it/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_762b2941ff3e47d89b6e6ce4350bc058\"\n          }\n        },\n        \"475e5353d31147d3ab156c0e7835684c\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"c3f65d683c6e4fe18e31ecc305f8d455\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": \"2\",\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"9b50abad66b44022aa389bc3f312db6b\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"762b2941ff3e47d89b6e6ce4350bc058\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"1597779d89464892885045be715890a8\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_8a42468ed6b945e8bfce1803f3ea4452\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_f87eae824cf1492b9555b78648a9f261\",\n              \"IPY_MODEL_6cd0d574b5fd43588b8d492674125218\"\n            ]\n          }\n        },\n        \"8a42468ed6b945e8bfce1803f3ea4452\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"f87eae824cf1492b9555b78648a9f261\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_17b25142ac744ba882e2bbd1f42c1db2\",\n            \"_dom_classes\": [],\n            \"description\": \"100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"success\",\n            \"max\": 626,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 626,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_09185d325ef84c1fad7b07fbd9eeed31\"\n          }\n        },\n        \"6cd0d574b5fd43588b8d492674125218\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_ba31765789dc46229493674dab21921d\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 626/626 [06:35&lt;00:00,  1.58it/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_a9dd88fb73374e108482b80993b998eb\"\n          }\n        },\n        \"17b25142ac744ba882e2bbd1f42c1db2\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"09185d325ef84c1fad7b07fbd9eeed31\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"ba31765789dc46229493674dab21921d\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"a9dd88fb73374e108482b80993b998eb\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        }\n      }\n    }\n  },\n  \"cells\": [\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"view-in-github\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"<a href=\\\"https://colab.research.google.com/github/patil-suraj/exploring-T5/blob/master/t5_fine_tuning.ipynb\\\" target=\\\"_parent\\\"><img src=\\\"https://colab.research.google.com/assets/colab-badge.svg\\\" alt=\\\"Open In Colab\\\"/></a>\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"PJX4vkjj6wYz\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"83a8a420-48cd-4d49-bc60-2693268481c6\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 121\n        }\n      },\n      \"source\": [\n        \"from google.colab import drive\\n\",\n        \"drive.mount('/content/drive')\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly\\n\",\n            \"\\n\",\n            \"Enter your authorization code:\\n\",\n            \"··········\\n\",\n            \"Mounted at /content/drive\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"1V5cInhu42Wk\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"5501a5f1-fc49-4df7-f7a0-31cc37647337\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 302\n        }\n      },\n      \"source\": [\n        \"!nvidia-smi\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"Sat May  9 15:05:25 2020       \\n\",\n            \"+-----------------------------------------------------------------------------+\\n\",\n            \"| NVIDIA-SMI 440.82       Driver Version: 418.67       CUDA Version: 10.1     |\\n\",\n            \"|-------------------------------+----------------------+----------------------+\\n\",\n            \"| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\\n\",\n            \"| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\\n\",\n            \"|===============================+======================+======================|\\n\",\n            \"|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |\\n\",\n            \"| N/A   34C    P0    25W / 250W |      0MiB / 16280MiB |      0%      Default |\\n\",\n            \"+-------------------------------+----------------------+----------------------+\\n\",\n            \"                                                                               \\n\",\n            \"+-----------------------------------------------------------------------------+\\n\",\n            \"| Processes:                                                       GPU Memory |\\n\",\n            \"|  GPU       PID   Type   Process name                             Usage      |\\n\",\n            \"|=============================================================================|\\n\",\n            \"|  No running processes found                                                 |\\n\",\n            \"+-----------------------------------------------------------------------------+\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"epWcPHhJ7v7j\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"Instal apex if you want to do 16 bit training. You'll probably need to restart the notebook after installing apex\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"k1Xy7ZG-7gHt\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"# !export CUDA_HOME=/usr/local/cuda-10.1\\n\",\n        \"# !git clone https://github.com/NVIDIA/apex\\n\",\n        \"# !pip install -v --no-cache-dir --global-option=\\\"--cpp_ext\\\" --global-option=\\\"--cuda_ext\\\" ./apex\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"SDVQ04fGRb1v\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"11689986-ca27-4ab0-f14d-5ee4f0eba40d\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 1000\n        }\n      },\n      \"source\": [\n        \"!pip install transformers\\n\",\n        \"!pip install pytorch_lightning\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"Collecting transformers\\n\",\n            \"\\u001b[?25l  Downloading https://files.pythonhosted.org/packages/cd/38/c9527aa055241c66c4d785381eaf6f80a28c224cae97daa1f8b183b5fabb/transformers-2.9.0-py3-none-any.whl (635kB)\\n\",\n            \"\\r\\u001b[K     |▌                               | 10kB 20.5MB/s eta 0:00:01\\r\\u001b[K     |█                               | 20kB 1.7MB/s eta 0:00:01\\r\\u001b[K     |█▌                              | 30kB 2.3MB/s eta 0:00:01\\r\\u001b[K     |██                              | 40kB 2.6MB/s eta 0:00:01\\r\\u001b[K     |██▋                             | 51kB 2.0MB/s eta 0:00:01\\r\\u001b[K     |███                             | 61kB 2.3MB/s eta 0:00:01\\r\\u001b[K     |███▋                            | 71kB 2.5MB/s eta 0:00:01\\r\\u001b[K     |████▏                           | 81kB 2.7MB/s eta 0:00:01\\r\\u001b[K     |████▋                           | 92kB 3.0MB/s eta 0:00:01\\r\\u001b[K     |█████▏                          | 102kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |█████▊                          | 112kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |██████▏                         | 122kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |██████▊                         | 133kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |███████▏                        | 143kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |███████▊                        | 153kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |████████▎                       | 163kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |████████▊                       | 174kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |█████████▎                      | 184kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |█████████▉                      | 194kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |██████████▎                     | 204kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |██████████▉                     | 215kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |███████████▍                    | 225kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |███████████▉                    | 235kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |████████████▍                   | 245kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |████████████▉                   | 256kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |█████████████▍                  | 266kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |██████████████                  | 276kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |██████████████▍                 | 286kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |███████████████                 | 296kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |███████████████▌                | 307kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |████████████████                | 317kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |████████████████▌               | 327kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |█████████████████               | 337kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |█████████████████▌              | 348kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |██████████████████              | 358kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |██████████████████▋             | 368kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |███████████████████             | 378kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |███████████████████▋            | 389kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |████████████████████            | 399kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |████████████████████▋           | 409kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |█████████████████████▏          | 419kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |█████████████████████▋          | 430kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |██████████████████████▏         | 440kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |██████████████████████▊         | 450kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |███████████████████████▏        | 460kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |███████████████████████▊        | 471kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |████████████████████████▎       | 481kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |████████████████████████▊       | 491kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |█████████████████████████▎      | 501kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |█████████████████████████▊      | 512kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |██████████████████████████▎     | 522kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |██████████████████████████▉     | 532kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |███████████████████████████▎    | 542kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |███████████████████████████▉    | 552kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |████████████████████████████▍   | 563kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |████████████████████████████▉   | 573kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |█████████████████████████████▍  | 583kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |██████████████████████████████  | 593kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |██████████████████████████████▍ | 604kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |███████████████████████████████ | 614kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |███████████████████████████████▍| 624kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |████████████████████████████████| 634kB 2.8MB/s eta 0:00:01\\r\\u001b[K     |████████████████████████████████| 645kB 2.8MB/s \\n\",\n            \"\\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from transformers) (2.23.0)\\n\",\n            \"Requirement already satisfied: dataclasses; python_version < \\\"3.7\\\" in /usr/local/lib/python3.6/dist-packages (from transformers) (0.7)\\n\",\n            \"Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.6/dist-packages (from transformers) (4.41.1)\\n\",\n            \"Collecting tokenizers==0.7.0\\n\",\n            \"\\u001b[?25l  Downloading https://files.pythonhosted.org/packages/14/e5/a26eb4716523808bb0a799fcfdceb6ebf77a18169d9591b2f46a9adb87d9/tokenizers-0.7.0-cp36-cp36m-manylinux1_x86_64.whl (3.8MB)\\n\",\n            \"\\u001b[K     |████████████████████████████████| 3.8MB 12.8MB/s \\n\",\n            \"\\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.6/dist-packages (from transformers) (2019.12.20)\\n\",\n            \"Collecting sacremoses\\n\",\n            \"\\u001b[?25l  Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)\\n\",\n            \"\\u001b[K     |████████████████████████████████| 890kB 28.2MB/s \\n\",\n            \"\\u001b[?25hCollecting sentencepiece\\n\",\n            \"\\u001b[?25l  Downloading https://files.pythonhosted.org/packages/98/2c/8df20f3ac6c22ac224fff307ebc102818206c53fc454ecd37d8ac2060df5/sentencepiece-0.1.86-cp36-cp36m-manylinux1_x86_64.whl (1.0MB)\\n\",\n            \"\\u001b[K     |████████████████████████████████| 1.0MB 42.6MB/s \\n\",\n            \"\\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from transformers) (1.18.4)\\n\",\n            \"Requirement already satisfied: filelock in /usr/local/lib/python3.6/dist-packages (from transformers) (3.0.12)\\n\",\n            \"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (3.0.4)\\n\",\n            \"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (1.24.3)\\n\",\n            \"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (2020.4.5.1)\\n\",\n            \"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (2.9)\\n\",\n            \"Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (1.12.0)\\n\",\n            \"Requirement already satisfied: click in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (7.1.2)\\n\",\n            \"Requirement already satisfied: joblib in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (0.14.1)\\n\",\n            \"Building wheels for collected packages: sacremoses\\n\",\n            \"  Building wheel for sacremoses (setup.py) ... \\u001b[?25l\\u001b[?25hdone\\n\",\n            \"  Created wheel for sacremoses: filename=sacremoses-0.0.43-cp36-none-any.whl size=893260 sha256=1d6422ddbf7526c5762d09193b36548e7b07bf9cc526057f833254f31a68c87c\\n\",\n            \"  Stored in directory: /root/.cache/pip/wheels/29/3c/fd/7ce5c3f0666dab31a50123635e6fb5e19ceb42ce38d4e58f45\\n\",\n            \"Successfully built sacremoses\\n\",\n            \"Installing collected packages: tokenizers, sacremoses, sentencepiece, transformers\\n\",\n            \"Successfully installed sacremoses-0.0.43 sentencepiece-0.1.86 tokenizers-0.7.0 transformers-2.9.0\\n\",\n            \"Collecting pytorch_lightning\\n\",\n            \"\\u001b[?25l  Downloading https://files.pythonhosted.org/packages/75/ac/ac03f1f3fa950d96ca52f07d33fdbf5add05f164c1ac4eae179231dfa93d/pytorch_lightning-0.7.5-py3-none-any.whl (233kB)\\n\",\n            \"\\u001b[K     |████████████████████████████████| 235kB 2.8MB/s \\n\",\n            \"\\u001b[?25hRequirement already satisfied: numpy>=1.16.4 in /usr/local/lib/python3.6/dist-packages (from pytorch_lightning) (1.18.4)\\n\",\n            \"Requirement already satisfied: tqdm>=4.41.0 in /usr/local/lib/python3.6/dist-packages (from pytorch_lightning) (4.41.1)\\n\",\n            \"Collecting future>=0.17.1\\n\",\n            \"\\u001b[?25l  Downloading https://files.pythonhosted.org/packages/45/0b/38b06fd9b92dc2b68d58b75f900e97884c45bedd2ff83203d933cf5851c9/future-0.18.2.tar.gz (829kB)\\n\",\n            \"\\u001b[K     |████████████████████████████████| 829kB 8.4MB/s \\n\",\n            \"\\u001b[?25hRequirement already satisfied: tensorboard>=1.14 in /usr/local/lib/python3.6/dist-packages (from pytorch_lightning) (2.2.1)\\n\",\n            \"Requirement already satisfied: torch>=1.1 in /usr/local/lib/python3.6/dist-packages (from pytorch_lightning) (1.5.0+cu101)\\n\",\n            \"Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (1.12.0)\\n\",\n            \"Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (1.6.0.post3)\\n\",\n            \"Requirement already satisfied: google-auth<2,>=1.6.3 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (1.7.2)\\n\",\n            \"Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (3.2.1)\\n\",\n            \"Requirement already satisfied: wheel>=0.26; python_version >= \\\"3\\\" in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (0.34.2)\\n\",\n            \"Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (0.4.1)\\n\",\n            \"Requirement already satisfied: grpcio>=1.24.3 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (1.28.1)\\n\",\n            \"Requirement already satisfied: protobuf>=3.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (3.10.0)\\n\",\n            \"Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (0.9.0)\\n\",\n            \"Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (2.23.0)\\n\",\n            \"Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (46.1.3)\\n\",\n            \"Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard>=1.14->pytorch_lightning) (1.0.1)\\n\",\n            \"Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard>=1.14->pytorch_lightning) (0.2.8)\\n\",\n            \"Requirement already satisfied: cachetools<3.2,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard>=1.14->pytorch_lightning) (3.1.1)\\n\",\n            \"Requirement already satisfied: rsa<4.1,>=3.1.4 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard>=1.14->pytorch_lightning) (4.0)\\n\",\n            \"Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard>=1.14->pytorch_lightning) (1.3.0)\\n\",\n            \"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard>=1.14->pytorch_lightning) (2.9)\\n\",\n            \"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard>=1.14->pytorch_lightning) (2020.4.5.1)\\n\",\n            \"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard>=1.14->pytorch_lightning) (1.24.3)\\n\",\n            \"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard>=1.14->pytorch_lightning) (3.0.4)\\n\",\n            \"Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.6/dist-packages (from pyasn1-modules>=0.2.1->google-auth<2,>=1.6.3->tensorboard>=1.14->pytorch_lightning) (0.4.8)\\n\",\n            \"Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard>=1.14->pytorch_lightning) (3.1.0)\\n\",\n            \"Building wheels for collected packages: future\\n\",\n            \"  Building wheel for future (setup.py) ... \\u001b[?25l\\u001b[?25hdone\\n\",\n            \"  Created wheel for future: filename=future-0.18.2-cp36-none-any.whl size=491057 sha256=2748e4b7f9acd3e1e87b8118cdcb4cb5a4bf5ed682d99f3866e26265ab336042\\n\",\n            \"  Stored in directory: /root/.cache/pip/wheels/8b/99/a0/81daf51dcd359a9377b110a8a886b3895921802d2fc1b2397e\\n\",\n            \"Successfully built future\\n\",\n            \"Installing collected packages: future, pytorch-lightning\\n\",\n            \"  Found existing installation: future 0.16.0\\n\",\n            \"    Uninstalling future-0.16.0:\\n\",\n            \"      Successfully uninstalled future-0.16.0\\n\",\n            \"Successfully installed future-0.18.2 pytorch-lightning-0.7.5\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"HVxGfmEMCKs_\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"## T5 fine-tuning\\n\",\n        \"\\n\",\n        \"This notebook is to showcase how to fine-tune [T5 model](https://arxiv.org/abs/1910.10683) with Huggigface's [Transformers](https://github.com/huggingface/transformers/) to solve different NLP tasks using text-2-text approach proposed in the T5 paper. For demo I chose 3 non text-2-text problems just to reiterate the fact from the paper that how widely applicable this text-2-text framework is and how it can be used for different tasks without changing the model at all.\\n\",\n        \"\\n\",\n        \"This is a rough draft so if you find any issues with this notebook or have any  questions reach out to me via [Twitter](https://twitter.com/psuraj28).\\n\",\n        \"\\n\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"HS8mNXq6bdxq\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"b0a32f10-f2ef-4d49-b433-266e8206040b\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 84\n        }\n      },\n      \"source\": [\n        \"import argparse\\n\",\n        \"import glob\\n\",\n        \"import os\\n\",\n        \"import json\\n\",\n        \"import time\\n\",\n        \"import logging\\n\",\n        \"import random\\n\",\n        \"import re\\n\",\n        \"from itertools import chain\\n\",\n        \"from string import punctuation\\n\",\n        \"\\n\",\n        \"import nltk\\n\",\n        \"nltk.download('punkt')\\n\",\n        \"from nltk.tokenize import sent_tokenize\\n\",\n        \"\\n\",\n        \"import pandas as pd\\n\",\n        \"import numpy as np\\n\",\n        \"import torch\\n\",\n        \"from torch.utils.data import Dataset, DataLoader\\n\",\n        \"import pytorch_lightning as pl\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"from transformers import (\\n\",\n        \"    AdamW,\\n\",\n        \"    T5ForConditionalGeneration,\\n\",\n        \"    T5Tokenizer,\\n\",\n        \"    get_linear_schedule_with_warmup\\n\",\n        \")\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"[nltk_data] Downloading package punkt to /root/nltk_data...\\n\",\n            \"[nltk_data]   Unzipping tokenizers/punkt.zip.\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:transformers.file_utils:PyTorch version 1.5.0+cu101 available.\\n\",\n            \"INFO:transformers.file_utils:TensorFlow version 2.2.0-rc4 available.\\n\"\n          ],\n          \"name\": \"stderr\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"IswYuhWaz7QJ\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"def set_seed(seed):\\n\",\n        \"  random.seed(seed)\\n\",\n        \"  np.random.seed(seed)\\n\",\n        \"  torch.manual_seed(seed)\\n\",\n        \"  if torch.cuda.is_available():\\n\",\n        \"    torch.cuda.manual_seed_all(seed)\\n\",\n        \"\\n\",\n        \"set_seed(42)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"RKNr7fgzcKpZ\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"## Model\\n\",\n        \"\\n\",\n        \"We'll be using the awesome [pytorch-lightning](https://github.com/PytorchLightning/pytorch-lightning) library for training. Most of the below code is adapted from here https://github.com/huggingface/transformers/blob/master/examples/lightning_base.py\\n\",\n        \"\\n\",\n        \"The trainer is generic and can be used for any text-2-text task. You'll just need to change the dataset. Rest of the code will stay unchanged for all the tasks.\\n\",\n        \"\\n\",\n        \"This is the most intresting and powrfull thing about the text-2-text format. You can fine-tune the model on variety of NLP tasks by just formulating the problem in text-2-text setting. No need to change hyperparameters, learning rate, optimizer or loss function. Just plug in your dataset and you are ready to go!\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"B7uVNBtXST5X\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"class T5FineTuner(pl.LightningModule):\\n\",\n        \"  def __init__(self, hparams):\\n\",\n        \"    super(T5FineTuner, self).__init__()\\n\",\n        \"    self.hparams = hparams\\n\",\n        \"    \\n\",\n        \"    self.model = T5ForConditionalGeneration.from_pretrained(hparams.model_name_or_path)\\n\",\n        \"    self.tokenizer = T5Tokenizer.from_pretrained(hparams.tokenizer_name_or_path)\\n\",\n        \"  \\n\",\n        \"  def is_logger(self):\\n\",\n        \"    return self.trainer.proc_rank <= 0\\n\",\n        \"  \\n\",\n        \"  def forward(\\n\",\n        \"      self, input_ids, attention_mask=None, decoder_input_ids=None, decoder_attention_mask=None, lm_labels=None\\n\",\n        \"  ):\\n\",\n        \"    return self.model(\\n\",\n        \"        input_ids,\\n\",\n        \"        attention_mask=attention_mask,\\n\",\n        \"        decoder_input_ids=decoder_input_ids,\\n\",\n        \"        decoder_attention_mask=decoder_attention_mask,\\n\",\n        \"        lm_labels=lm_labels,\\n\",\n        \"    )\\n\",\n        \"\\n\",\n        \"  def _step(self, batch):\\n\",\n        \"    lm_labels = batch[\\\"target_ids\\\"]\\n\",\n        \"    lm_labels[lm_labels[:, :] == self.tokenizer.pad_token_id] = -100\\n\",\n        \"\\n\",\n        \"    outputs = self(\\n\",\n        \"        input_ids=batch[\\\"source_ids\\\"],\\n\",\n        \"        attention_mask=batch[\\\"source_mask\\\"],\\n\",\n        \"        lm_labels=lm_labels,\\n\",\n        \"        decoder_attention_mask=batch['target_mask']\\n\",\n        \"    )\\n\",\n        \"\\n\",\n        \"    loss = outputs[0]\\n\",\n        \"\\n\",\n        \"    return loss\\n\",\n        \"\\n\",\n        \"  def training_step(self, batch, batch_idx):\\n\",\n        \"    loss = self._step(batch)\\n\",\n        \"\\n\",\n        \"    tensorboard_logs = {\\\"train_loss\\\": loss}\\n\",\n        \"    return {\\\"loss\\\": loss, \\\"log\\\": tensorboard_logs}\\n\",\n        \"  \\n\",\n        \"  def training_epoch_end(self, outputs):\\n\",\n        \"    avg_train_loss = torch.stack([x[\\\"loss\\\"] for x in outputs]).mean()\\n\",\n        \"    tensorboard_logs = {\\\"avg_train_loss\\\": avg_train_loss}\\n\",\n        \"    return {\\\"avg_train_loss\\\": avg_train_loss, \\\"log\\\": tensorboard_logs, 'progress_bar': tensorboard_logs}\\n\",\n        \"\\n\",\n        \"  def validation_step(self, batch, batch_idx):\\n\",\n        \"    loss = self._step(batch)\\n\",\n        \"    return {\\\"val_loss\\\": loss}\\n\",\n        \"  \\n\",\n        \"  def validation_epoch_end(self, outputs):\\n\",\n        \"    avg_loss = torch.stack([x[\\\"val_loss\\\"] for x in outputs]).mean()\\n\",\n        \"    tensorboard_logs = {\\\"val_loss\\\": avg_loss}\\n\",\n        \"    return {\\\"avg_val_loss\\\": avg_loss, \\\"log\\\": tensorboard_logs, 'progress_bar': tensorboard_logs}\\n\",\n        \"\\n\",\n        \"  def configure_optimizers(self):\\n\",\n        \"    \\\"Prepare optimizer and schedule (linear warmup and decay)\\\"\\n\",\n        \"\\n\",\n        \"    model = self.model\\n\",\n        \"    no_decay = [\\\"bias\\\", \\\"LayerNorm.weight\\\"]\\n\",\n        \"    optimizer_grouped_parameters = [\\n\",\n        \"        {\\n\",\n        \"            \\\"params\\\": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],\\n\",\n        \"            \\\"weight_decay\\\": self.hparams.weight_decay,\\n\",\n        \"        },\\n\",\n        \"        {\\n\",\n        \"            \\\"params\\\": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)],\\n\",\n        \"            \\\"weight_decay\\\": 0.0,\\n\",\n        \"        },\\n\",\n        \"    ]\\n\",\n        \"    optimizer = AdamW(optimizer_grouped_parameters, lr=self.hparams.learning_rate, eps=self.hparams.adam_epsilon)\\n\",\n        \"    self.opt = optimizer\\n\",\n        \"    return [optimizer]\\n\",\n        \"  \\n\",\n        \"  def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx, second_order_closure=None):\\n\",\n        \"    if self.trainer.use_tpu:\\n\",\n        \"      xm.optimizer_step(optimizer)\\n\",\n        \"    else:\\n\",\n        \"      optimizer.step()\\n\",\n        \"    optimizer.zero_grad()\\n\",\n        \"    self.lr_scheduler.step()\\n\",\n        \"  \\n\",\n        \"  def get_tqdm_dict(self):\\n\",\n        \"    tqdm_dict = {\\\"loss\\\": \\\"{:.3f}\\\".format(self.trainer.avg_loss), \\\"lr\\\": self.lr_scheduler.get_last_lr()[-1]}\\n\",\n        \"\\n\",\n        \"    return tqdm_dict\\n\",\n        \"\\n\",\n        \"  def train_dataloader(self):\\n\",\n        \"    train_dataset = get_dataset(tokenizer=self.tokenizer, type_path=\\\"train\\\", args=self.hparams)\\n\",\n        \"    dataloader = DataLoader(train_dataset, batch_size=self.hparams.train_batch_size, drop_last=True, shuffle=True, num_workers=4)\\n\",\n        \"    t_total = (\\n\",\n        \"        (len(dataloader.dataset) // (self.hparams.train_batch_size * max(1, self.hparams.n_gpu)))\\n\",\n        \"        // self.hparams.gradient_accumulation_steps\\n\",\n        \"        * float(self.hparams.num_train_epochs)\\n\",\n        \"    )\\n\",\n        \"    scheduler = get_linear_schedule_with_warmup(\\n\",\n        \"        self.opt, num_warmup_steps=self.hparams.warmup_steps, num_training_steps=t_total\\n\",\n        \"    )\\n\",\n        \"    self.lr_scheduler = scheduler\\n\",\n        \"    return dataloader\\n\",\n        \"\\n\",\n        \"  def val_dataloader(self):\\n\",\n        \"    val_dataset = get_dataset(tokenizer=self.tokenizer, type_path=\\\"val\\\", args=self.hparams)\\n\",\n        \"    return DataLoader(val_dataset, batch_size=self.hparams.eval_batch_size, num_workers=4)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"oh1R5C-GwMqx\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"logger = logging.getLogger(__name__)\\n\",\n        \"\\n\",\n        \"class LoggingCallback(pl.Callback):\\n\",\n        \"  def on_validation_end(self, trainer, pl_module):\\n\",\n        \"    logger.info(\\\"***** Validation results *****\\\")\\n\",\n        \"    if pl_module.is_logger():\\n\",\n        \"      metrics = trainer.callback_metrics\\n\",\n        \"      # Log results\\n\",\n        \"      for key in sorted(metrics):\\n\",\n        \"        if key not in [\\\"log\\\", \\\"progress_bar\\\"]:\\n\",\n        \"          logger.info(\\\"{} = {}\\\\n\\\".format(key, str(metrics[key])))\\n\",\n        \"\\n\",\n        \"  def on_test_end(self, trainer, pl_module):\\n\",\n        \"    logger.info(\\\"***** Test results *****\\\")\\n\",\n        \"\\n\",\n        \"    if pl_module.is_logger():\\n\",\n        \"      metrics = trainer.callback_metrics\\n\",\n        \"\\n\",\n        \"      # Log and save results to file\\n\",\n        \"      output_test_results_file = os.path.join(pl_module.hparams.output_dir, \\\"test_results.txt\\\")\\n\",\n        \"      with open(output_test_results_file, \\\"w\\\") as writer:\\n\",\n        \"        for key in sorted(metrics):\\n\",\n        \"          if key not in [\\\"log\\\", \\\"progress_bar\\\"]:\\n\",\n        \"            logger.info(\\\"{} = {}\\\\n\\\".format(key, str(metrics[key])))\\n\",\n        \"            writer.write(\\\"{} = {}\\\\n\\\".format(key, str(metrics[key])))\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"a4hjvsBJ5Zk5\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"Let's define the hyperparameters and other arguments. You can overide this `dict` for specific task as needed. While in most of cases you'll only need to change the `data_dir`and `output_dir`.\\n\",\n        \"\\n\",\n        \"Here the batch size is 8 and gradient_accumulation_steps are 16 so the effective batch size is 128\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"urduopvizqTq\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"args_dict = dict(\\n\",\n        \"    data_dir=\\\"\\\", # path for data files\\n\",\n        \"    output_dir=\\\"\\\", # path to save the checkpoints\\n\",\n        \"    model_name_or_path='t5-base',\\n\",\n        \"    tokenizer_name_or_path='t5-base',\\n\",\n        \"    max_seq_length=512,\\n\",\n        \"    learning_rate=3e-4,\\n\",\n        \"    weight_decay=0.0,\\n\",\n        \"    adam_epsilon=1e-8,\\n\",\n        \"    warmup_steps=0,\\n\",\n        \"    train_batch_size=8,\\n\",\n        \"    eval_batch_size=8,\\n\",\n        \"    num_train_epochs=2,\\n\",\n        \"    gradient_accumulation_steps=16,\\n\",\n        \"    n_gpu=1,\\n\",\n        \"    early_stop_callback=False,\\n\",\n        \"    fp_16=False, # if you want to enable 16-bit training then install apex and set this to true\\n\",\n        \"    opt_level='O1', # you can find out more on optimisation levels here https://nvidia.github.io/apex/amp.html#opt-levels-and-properties\\n\",\n        \"    max_grad_norm=1.0, # if you enable 16-bit training then set this to a sensible value, 0.5 is a good default\\n\",\n        \"    seed=42,\\n\",\n        \")\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"vfhlYUUV2NIh\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"## IMDB review classification\"\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"b3C13iabZvwK\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"### Download IMDB Data\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"7R0QdcgXuIWW\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"!wget https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\\n\",\n        \"!tar -xvf aclImdb_v1.tar.gz\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"ni1cAK7EvXSB\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"train_pos_files = glob.glob('aclImdb/train/pos/*.txt')\\n\",\n        \"train_neg_files = glob.glob('aclImdb/train/neg/*.txt')\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"jEsRn5pa0v8d\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"6977ce56-d0b4-4d9f-8548-22003bb07eaf\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 34\n        }\n      },\n      \"source\": [\n        \"len(train_pos_files), len(train_neg_files)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"execute_result\",\n          \"data\": {\n            \"text/plain\": [\n              \"(12500, 12500)\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          },\n          \"execution_count\": 10\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"5zgS8KhlaPiA\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"We will use 2000 samples from the train set for validation. Let's choose 1000 postive reviews and 1000 negative reviews for validation and save them in the val directory\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"hLvBHcXwzXrk\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"!mkdir aclImdb/val aclImdb/val/pos aclImdb/val/neg\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"IXZmLZ1pzjiY\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"random.shuffle(train_pos_files)\\n\",\n        \"random.shuffle(train_neg_files)\\n\",\n        \"\\n\",\n        \"val_pos_files = train_pos_files[:1000]\\n\",\n        \"val_neg_files = train_neg_files[:1000]\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"5yTS2Jx40UNu\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"import shutil\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"hJnJpkdb0ZKY\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"for f in val_pos_files:\\n\",\n        \"  shutil.move(f,  'aclImdb/val/pos')\\n\",\n        \"for f in val_neg_files:\\n\",\n        \"  shutil.move(f,  'aclImdb/val/neg')\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"qdEgCwL7cIyi\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"### Prepare Dataset\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"McQC1FotigqA\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"f60dbf68-32cf-44e1-9a2f-f9dba38cbbac\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 186,\n          \"referenced_widgets\": [\n            \"7d8f60bfc0a248e58028b6e8a477a5f7\",\n            \"72dc1e39b931429883e68c0603797896\",\n            \"cde60c5e18f04ba792fff8c2ac33f470\",\n            \"c0c0df12695b4a1eacf8fa4ccc0ac62c\",\n            \"72ea881ce3f445a9983d858b76dd257b\",\n            \"d0f0c28a14b242f8990a547ed7f87c04\",\n            \"f97741534b554be3b5cdccd45c73b317\",\n            \"1e70a3dc7090487fa883e932bff395cb\"\n          ]\n        }\n      },\n      \"source\": [\n        \"tokenizer = T5Tokenizer.from_pretrained('t5-base')\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:filelock:Lock 139780871368544 acquired on /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f.lock\\n\",\n            \"INFO:transformers.file_utils:https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmpgy9lk1eo\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"7d8f60bfc0a248e58028b6e8a477a5f7\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=791656.0, style=ProgressStyle(descripti…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:transformers.file_utils:storing https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model in cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\\n\",\n            \"INFO:transformers.file_utils:creating metadata file for /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\\n\",\n            \"INFO:filelock:Lock 139780871368544 released on /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f.lock\\n\",\n            \"INFO:transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model from cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"wthd9SM74RG8\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"52deb6bd-19c4-4071-8bcb-254925d8e4cc\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 34\n        }\n      },\n      \"source\": [\n        \"ids_neg = tokenizer.encode('negative </s>')\\n\",\n        \"ids_pos = tokenizer.encode('positive </s>')\\n\",\n        \"len(ids_neg), len(ids_pos)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"execute_result\",\n          \"data\": {\n            \"text/plain\": [\n              \"(2, 2)\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          },\n          \"execution_count\": 21\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"k5sJkyI3a723\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"All the examples are converted in the text-2-text format as shown in the paper. However I didn't use any task prefix here. The examples are encoded as follows,\\n\",\n        \"if the review is positive then the target is 'positive' else 'negative'\\n\",\n        \"\\n\",\n        \"**input**:  I went to see this\\n\",\n        \"movie with my husband, and we both\\n\",\n        \"thought the acting was terrible!\\\"\\n\",\n        \"\\n\",\n        \"**target**: negative\\n\",\n        \"\\n\",\n        \"**input**:  Despite what others say,\\n\",\n        \"I thought this movie was funny.\\n\",\n        \"\\n\",\n        \"**target**: positive\"\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"VEYmYHKGcxEq\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"The dataset below takes care of reading the review files and processing the examples in text-2-text format.\\n\",\n        \"\\n\",\n        \"It cleans the review text by removing the html tags. It also appends the eos token `</s>` at the end of input and target as required by the T5 model \\n\",\n        \"\\n\",\n        \"For T5 max input length is 512 and we can choose the max length for target sequence depending upon our dataset. The `T5Tokenizer` encodes both 'postive' and 'negative' as a single ids so I chose the max target length 2, extra 1 for the `</s>` token\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"IIY0GenSb72m\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"class ImdbDataset(Dataset):\\n\",\n        \"  def __init__(self, tokenizer, data_dir, type_path,  max_len=512):\\n\",\n        \"    self.pos_file_path = os.path.join(data_dir, type_path, 'pos')\\n\",\n        \"    self.neg_file_path = os.path.join(data_dir, type_path, 'neg')\\n\",\n        \"    \\n\",\n        \"    self.pos_files = glob.glob(\\\"%s/*.txt\\\" % self.pos_file_path)\\n\",\n        \"    self.neg_files = glob.glob(\\\"%s/*.txt\\\" % self.neg_file_path)\\n\",\n        \"    \\n\",\n        \"    self.max_len = max_len\\n\",\n        \"    self.tokenizer = tokenizer\\n\",\n        \"    self.inputs = []\\n\",\n        \"    self.targets = []\\n\",\n        \"\\n\",\n        \"    self._build()\\n\",\n        \"  \\n\",\n        \"  def __len__(self):\\n\",\n        \"    return len(self.inputs)\\n\",\n        \"  \\n\",\n        \"  def __getitem__(self, index):\\n\",\n        \"    source_ids = self.inputs[index][\\\"input_ids\\\"].squeeze()\\n\",\n        \"    target_ids = self.targets[index][\\\"input_ids\\\"].squeeze()\\n\",\n        \"\\n\",\n        \"    src_mask    = self.inputs[index][\\\"attention_mask\\\"].squeeze()  # might need to squeeze\\n\",\n        \"    target_mask = self.targets[index][\\\"attention_mask\\\"].squeeze()  # might need to squeeze\\n\",\n        \"\\n\",\n        \"    return {\\\"source_ids\\\": source_ids, \\\"source_mask\\\": src_mask, \\\"target_ids\\\": target_ids, \\\"target_mask\\\": target_mask}\\n\",\n        \"  \\n\",\n        \"  def _build(self):\\n\",\n        \"    self._buil_examples_from_files(self.pos_files, 'positive')\\n\",\n        \"    self._buil_examples_from_files(self.neg_files, 'negative')\\n\",\n        \"  \\n\",\n        \"  def _buil_examples_from_files(self, files, sentiment):\\n\",\n        \"    REPLACE_NO_SPACE = re.compile(\\\"[.;:!\\\\'?,\\\\\\\"()\\\\[\\\\]]\\\")\\n\",\n        \"    REPLACE_WITH_SPACE = re.compile(\\\"(<br\\\\s*/><br\\\\s*/>)|(\\\\-)|(\\\\/)\\\")\\n\",\n        \"\\n\",\n        \"    for path in files:\\n\",\n        \"      with open(path, 'r') as f:\\n\",\n        \"        text = f.read()\\n\",\n        \"      \\n\",\n        \"      line = text.strip()\\n\",\n        \"      line = REPLACE_NO_SPACE.sub(\\\"\\\", line) \\n\",\n        \"      line = REPLACE_WITH_SPACE.sub(\\\"\\\", line)\\n\",\n        \"      line = line + ' </s>'\\n\",\n        \"\\n\",\n        \"      target = sentiment + \\\" </s>\\\"\\n\",\n        \"\\n\",\n        \"       # tokenize inputs\\n\",\n        \"      tokenized_inputs = self.tokenizer.batch_encode_plus(\\n\",\n        \"          [line], max_length=self.max_len, pad_to_max_length=True, return_tensors=\\\"pt\\\"\\n\",\n        \"      )\\n\",\n        \"       # tokenize targets\\n\",\n        \"      tokenized_targets = self.tokenizer.batch_encode_plus(\\n\",\n        \"          [target], max_length=2, pad_to_max_length=True, return_tensors=\\\"pt\\\"\\n\",\n        \"      )\\n\",\n        \"\\n\",\n        \"      self.inputs.append(tokenized_inputs)\\n\",\n        \"      self.targets.append(tokenized_targets)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"gsnsKY6jemsr\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"98885b84-7f65-4d79-b470-619def772505\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 34\n        }\n      },\n      \"source\": [\n        \"dataset = ImdbDataset(tokenizer, 'aclImdb', 'val',  max_len=512)\\n\",\n        \"len(dataset)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"execute_result\",\n          \"data\": {\n            \"text/plain\": [\n              \"2000\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          },\n          \"execution_count\": 23\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"7g1gz05ccAzg\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"b3a263f1-8b22-46bf-9a33-f58c996d684a\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 70\n        }\n      },\n      \"source\": [\n        \"data = dataset[28]\\n\",\n        \"print(tokenizer.decode(data['source_ids']))\\n\",\n        \"print(tokenizer.decode(data['target_ids']))\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"To quote Flik that was my reaction exactly Wowyoure perfect This is the best movie I think I can even say its become my favorite movie ever even Wow I tell you what wow\\n\",\n            \"positive\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"W4cfw8bMcNdA\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"### Train\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"aTvkv4rzhPjy\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"!mkdir -p t5_imdb_sentiment\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"r5ngAP4OXFqZ\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"args_dict.update({'data_dir': 'aclImdb', 'output_dir': 't5_imdb_sentiment', 'num_train_epochs':2})\\n\",\n        \"args = argparse.Namespace(**args_dict)\\n\",\n        \"\\n\",\n        \"checkpoint_callback = pl.callbacks.ModelCheckpoint(\\n\",\n        \"    filepath=args.output_dir, prefix=\\\"checkpoint\\\", monitor=\\\"val_loss\\\", mode=\\\"min\\\", save_top_k=5\\n\",\n        \")\\n\",\n        \"\\n\",\n        \"train_params = dict(\\n\",\n        \"    accumulate_grad_batches=args.gradient_accumulation_steps,\\n\",\n        \"    gpus=args.n_gpu,\\n\",\n        \"    max_epochs=args.num_train_epochs,\\n\",\n        \"    early_stop_callback=False,\\n\",\n        \"    precision= 16 if args.fp_16 else 32,\\n\",\n        \"    amp_level=args.opt_level,\\n\",\n        \"    gradient_clip_val=args.max_grad_norm,\\n\",\n        \"    checkpoint_callback=checkpoint_callback,\\n\",\n        \"    callbacks=[LoggingCallback()],\\n\",\n        \")\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"RJt_VqzEAMUg\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"Define the `get_dataset` function to return the dataset. The model calls this function to get the train and val datasets. We are defining a dataset function so that we won't need to modify the model code at all. Redefine the function to return different dataset according to the problem. While this is not the best solution for now this works \"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"2h2aGPgp0vOf\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"def get_dataset(tokenizer, type_path, args):\\n\",\n        \"  return ImdbDataset(tokenizer=tokenizer, data_dir=args.data_dir, type_path=type_path,  max_len=args.max_seq_length)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"4IOQpawZA9XC\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"**Initialize model**\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"kJsz3a4SilAF\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"d711c5a7-4c7d-4392-8cf5-3df1cbcf2859\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 1000,\n          \"referenced_widgets\": [\n            \"f414bac332054c7f86af89b8e50c7d73\",\n            \"1d9c52a1bb8843b6b0f151571cbf30a4\",\n            \"ed039b8125714030b03912fb29a93ca4\",\n            \"d9b445b8b3b04569adf22429259b4954\",\n            \"6c61b3c76d7045eb825172ba51b3fa63\",\n            \"d11ffd1efc024c1ca86276430d29fd1e\",\n            \"22fac35d924f464ca0b33be21a566a86\",\n            \"cfe128b0d2c648c18d2255b3f8506a09\",\n            \"c34ac6d2548249819c1eab28956edec4\",\n            \"de2c77b3fb0f4dba99f92062b2db5328\",\n            \"6ea23f0979824aac935f3f1ad10a86cd\",\n            \"6452bc3b5ad445a8a5e272207fe4504d\",\n            \"d6ef508766c54f8993d1d1f3d7cac040\",\n            \"1b69bbddeb244defab9e21690a45c79e\",\n            \"4a2b56fd6780470ab1574509fa432183\",\n            \"3853231cd966465882a93fad9c5dc428\"\n          ]\n        }\n      },\n      \"source\": [\n        \"model = T5FineTuner(args)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:filelock:Lock 139780702227256 acquired on /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b.lock\\n\",\n            \"INFO:transformers.file_utils:https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmp5_6vo8c2\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"f414bac332054c7f86af89b8e50c7d73\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1199.0, style=ProgressStyle(description…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:transformers.file_utils:storing https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json in cache at /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b\\n\",\n            \"INFO:transformers.file_utils:creating metadata file for /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b\\n\",\n            \"INFO:filelock:Lock 139780702227256 released on /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b.lock\\n\",\n            \"INFO:transformers.configuration_utils:loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json from cache at /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b\\n\",\n            \"INFO:transformers.configuration_utils:Model config T5Config {\\n\",\n            \"  \\\"architectures\\\": [\\n\",\n            \"    \\\"T5WithLMHeadModel\\\"\\n\",\n            \"  ],\\n\",\n            \"  \\\"d_ff\\\": 3072,\\n\",\n            \"  \\\"d_kv\\\": 64,\\n\",\n            \"  \\\"d_model\\\": 768,\\n\",\n            \"  \\\"decoder_start_token_id\\\": 0,\\n\",\n            \"  \\\"dropout_rate\\\": 0.1,\\n\",\n            \"  \\\"eos_token_id\\\": 1,\\n\",\n            \"  \\\"initializer_factor\\\": 1.0,\\n\",\n            \"  \\\"is_encoder_decoder\\\": true,\\n\",\n            \"  \\\"layer_norm_epsilon\\\": 1e-06,\\n\",\n            \"  \\\"model_type\\\": \\\"t5\\\",\\n\",\n            \"  \\\"n_positions\\\": 512,\\n\",\n            \"  \\\"num_heads\\\": 12,\\n\",\n            \"  \\\"num_layers\\\": 12,\\n\",\n            \"  \\\"output_past\\\": true,\\n\",\n            \"  \\\"pad_token_id\\\": 0,\\n\",\n            \"  \\\"relative_attention_num_buckets\\\": 32,\\n\",\n            \"  \\\"task_specific_params\\\": {\\n\",\n            \"    \\\"summarization\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"length_penalty\\\": 2.0,\\n\",\n            \"      \\\"max_length\\\": 200,\\n\",\n            \"      \\\"min_length\\\": 30,\\n\",\n            \"      \\\"no_repeat_ngram_size\\\": 3,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"summarize: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_de\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to German: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_fr\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to French: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_ro\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to Romanian: \\\"\\n\",\n            \"    }\\n\",\n            \"  },\\n\",\n            \"  \\\"vocab_size\\\": 32128\\n\",\n            \"}\\n\",\n            \"\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:filelock:Lock 139780702189776 acquired on /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa.lock\\n\",\n            \"INFO:transformers.file_utils:https://cdn.huggingface.co/t5-base-pytorch_model.bin not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmps92w5ati\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"c34ac6d2548249819c1eab28956edec4\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=891691430.0, style=ProgressStyle(descri…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:transformers.file_utils:storing https://cdn.huggingface.co/t5-base-pytorch_model.bin in cache at /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa\\n\",\n            \"INFO:transformers.file_utils:creating metadata file for /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa\\n\",\n            \"INFO:filelock:Lock 139780702189776 released on /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa.lock\\n\",\n            \"INFO:transformers.modeling_utils:loading weights file https://cdn.huggingface.co/t5-base-pytorch_model.bin from cache at /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:transformers.modeling_utils:Weights of T5ForConditionalGeneration not initialized from pretrained model: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight']\\n\",\n            \"INFO:transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model from cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\\n\"\n          ],\n          \"name\": \"stderr\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"RSJytKv1BFyc\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"**Initialize trainer**\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"PxO8OTA3irbw\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"6ebd7f3f-09fe-4363-9869-24d39183d2ff\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 50\n        }\n      },\n      \"source\": [\n        \"trainer = pl.Trainer(**train_params)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:lightning:GPU available: True, used: True\\n\",\n            \"INFO:lightning:CUDA_VISIBLE_DEVICES: [0]\\n\"\n          ],\n          \"name\": \"stderr\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"Wo7cSSvFGEhe\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"**start fine-tuning**\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"hVGd6imfizLP\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"cca18a5f-7900-4f58-ed74-6684b72a54e1\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 1000,\n          \"referenced_widgets\": [\n            \"915a0b65612243668570c555a47a6c37\",\n            \"c85b348624504af294b78de744969493\",\n            \"d56a6918840e4f6588af5da5f8f54015\",\n            \"41db48cf488a4522b1f04b33c2261262\",\n            \"8c2d9ac8c22f486299949f4cbed16437\",\n            \"222974dba69145e7b171360bec239ba5\",\n            \"9e95200811bb497ab0ac0229f5e0ddaa\",\n            \"3773b14f23974ad3a5bbb7ff947e68ca\",\n            \"3ec26f803d124dd0877e1ce0e3517f68\",\n            \"aabb0b2f2ae64684a80f1ea39c9a7d1b\",\n            \"885696e0606c4353a5d21feec03aebc7\",\n            \"659dd7302f3a40038834c4f1d8e59250\",\n            \"6f3859c80aa945e4b4ae2aa957755b7c\",\n            \"a840a738d20b4f43baf18453db53fdf0\",\n            \"f7139c4e04374ffbafe6a849500c6369\",\n            \"ef8f0b7c9b0c4f829e3ad59e83cbdd67\",\n            \"dbe7a4854b8f420faaea8de4583fb1f0\",\n            \"4d1f674483d44e559ae1de553dd1d726\",\n            \"ce506c0137914e4db93b9db35154c62a\",\n            \"e92a181ff64d4e0290236a91cbdb8d67\",\n            \"e8f7179c238e4d2d91d456b2c07e1b3e\",\n            \"e67100d71b5047158ab48ef0fd36cb99\",\n            \"17f7e321de81404dabaa3e84fadce2cf\",\n            \"a15e2fcc467242cb9fad5b2082a70c39\",\n            \"f40c9bf16c9a473ba758a6439dce2652\",\n            \"8d17a251bf1440d4aa8513ad5f15ba1d\",\n            \"165319529b364183ae344a9a14f5bc52\",\n            \"3d0c08f3abbe421d83f2b35583221291\",\n            \"6e851577f682494c894b9afdd07b1201\",\n            \"e67e9e945a9c430f9844946cd81aae3a\",\n            \"34fbc6e29df046faaedd9fe3230559cb\",\n            \"bbbdd81a2e8f4d68b33d698f45ccc9ae\"\n          ]\n        }\n      },\n      \"source\": [\n        \"trainer.fit(model)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:lightning:\\n\",\n            \"    | Name                                                                  | Type                       | Params\\n\",\n            \"-----------------------------------------------------------------------------------------------------------------\\n\",\n            \"0   | model                                                                 | T5ForConditionalGeneration | 222 M \\n\",\n            \"1   | model.shared                                                          | Embedding                  | 24 M  \\n\",\n            \"2   | model.encoder                                                         | T5Stack                    | 109 M \\n\",\n            \"3   | model.encoder.block                                                   | ModuleList                 | 84 M  \\n\",\n            \"4   | model.encoder.block.0                                                 | T5Block                    | 7 M   \\n\",\n            \"5   | model.encoder.block.0.layer                                           | ModuleList                 | 7 M   \\n\",\n            \"6   | model.encoder.block.0.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"7   | model.encoder.block.0.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"8   | model.encoder.block.0.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"9   | model.encoder.block.0.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"10  | model.encoder.block.0.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"11  | model.encoder.block.0.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"12  | model.encoder.block.0.layer.0.SelfAttention.relative_attention_bias   | Embedding                  | 384   \\n\",\n            \"13  | model.encoder.block.0.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"14  | model.encoder.block.0.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"15  | model.encoder.block.0.layer.1                                         | T5LayerFF                  | 4 M   \\n\",\n            \"16  | model.encoder.block.0.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"17  | model.encoder.block.0.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"18  | model.encoder.block.0.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"19  | model.encoder.block.0.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"20  | model.encoder.block.0.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"21  | model.encoder.block.0.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"22  | model.encoder.block.1                                                 | T5Block                    | 7 M   \\n\",\n            \"23  | model.encoder.block.1.layer                                           | ModuleList                 | 7 M   \\n\",\n            \"24  | model.encoder.block.1.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"25  | model.encoder.block.1.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"26  | model.encoder.block.1.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"27  | model.encoder.block.1.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"28  | model.encoder.block.1.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"29  | model.encoder.block.1.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"30  | model.encoder.block.1.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"31  | model.encoder.block.1.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"32  | model.encoder.block.1.layer.1                                         | T5LayerFF                  | 4 M   \\n\",\n            \"33  | model.encoder.block.1.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"34  | model.encoder.block.1.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"35  | model.encoder.block.1.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"36  | model.encoder.block.1.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"37  | model.encoder.block.1.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"38  | model.encoder.block.1.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"39  | model.encoder.block.2                                                 | T5Block                    | 7 M   \\n\",\n            \"40  | model.encoder.block.2.layer                                           | ModuleList                 | 7 M   \\n\",\n            \"41  | model.encoder.block.2.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"42  | model.encoder.block.2.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"43  | model.encoder.block.2.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"44  | model.encoder.block.2.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"45  | model.encoder.block.2.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"46  | model.encoder.block.2.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"47  | model.encoder.block.2.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"48  | model.encoder.block.2.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"49  | model.encoder.block.2.layer.1                                         | T5LayerFF                  | 4 M   \\n\",\n            \"50  | model.encoder.block.2.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"51  | model.encoder.block.2.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"52  | model.encoder.block.2.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"53  | model.encoder.block.2.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"54  | model.encoder.block.2.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"55  | model.encoder.block.2.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"56  | model.encoder.block.3                                                 | T5Block                    | 7 M   \\n\",\n            \"57  | model.encoder.block.3.layer                                           | ModuleList                 | 7 M   \\n\",\n            \"58  | model.encoder.block.3.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"59  | model.encoder.block.3.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"60  | model.encoder.block.3.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"61  | model.encoder.block.3.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"62  | model.encoder.block.3.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"63  | model.encoder.block.3.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"64  | model.encoder.block.3.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"65  | model.encoder.block.3.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"66  | model.encoder.block.3.layer.1                                         | T5LayerFF                  | 4 M   \\n\",\n            \"67  | model.encoder.block.3.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"68  | model.encoder.block.3.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"69  | model.encoder.block.3.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"70  | model.encoder.block.3.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"71  | model.encoder.block.3.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"72  | model.encoder.block.3.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"73  | model.encoder.block.4                                                 | T5Block                    | 7 M   \\n\",\n            \"74  | model.encoder.block.4.layer                                           | ModuleList                 | 7 M   \\n\",\n            \"75  | model.encoder.block.4.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"76  | model.encoder.block.4.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"77  | model.encoder.block.4.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"78  | model.encoder.block.4.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"79  | model.encoder.block.4.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"80  | model.encoder.block.4.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"81  | model.encoder.block.4.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"82  | model.encoder.block.4.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"83  | model.encoder.block.4.layer.1                                         | T5LayerFF                  | 4 M   \\n\",\n            \"84  | model.encoder.block.4.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"85  | model.encoder.block.4.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"86  | model.encoder.block.4.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"87  | model.encoder.block.4.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"88  | model.encoder.block.4.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"89  | model.encoder.block.4.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"90  | model.encoder.block.5                                                 | T5Block                    | 7 M   \\n\",\n            \"91  | model.encoder.block.5.layer                                           | ModuleList                 | 7 M   \\n\",\n            \"92  | model.encoder.block.5.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"93  | model.encoder.block.5.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"94  | model.encoder.block.5.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"95  | model.encoder.block.5.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"96  | model.encoder.block.5.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"97  | model.encoder.block.5.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"98  | model.encoder.block.5.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"99  | model.encoder.block.5.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"100 | model.encoder.block.5.layer.1                                         | T5LayerFF                  | 4 M   \\n\",\n            \"101 | model.encoder.block.5.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"102 | model.encoder.block.5.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"103 | model.encoder.block.5.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"104 | model.encoder.block.5.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"105 | model.encoder.block.5.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"106 | model.encoder.block.5.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"107 | model.encoder.block.6                                                 | T5Block                    | 7 M   \\n\",\n            \"108 | model.encoder.block.6.layer                                           | ModuleList                 | 7 M   \\n\",\n            \"109 | model.encoder.block.6.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"110 | model.encoder.block.6.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"111 | model.encoder.block.6.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"112 | model.encoder.block.6.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"113 | model.encoder.block.6.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"114 | model.encoder.block.6.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"115 | model.encoder.block.6.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"116 | model.encoder.block.6.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"117 | model.encoder.block.6.layer.1                                         | T5LayerFF                  | 4 M   \\n\",\n            \"118 | model.encoder.block.6.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"119 | model.encoder.block.6.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"120 | model.encoder.block.6.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"121 | model.encoder.block.6.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"122 | model.encoder.block.6.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"123 | model.encoder.block.6.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"124 | model.encoder.block.7                                                 | T5Block                    | 7 M   \\n\",\n            \"125 | model.encoder.block.7.layer                                           | ModuleList                 | 7 M   \\n\",\n            \"126 | model.encoder.block.7.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"127 | model.encoder.block.7.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"128 | model.encoder.block.7.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"129 | model.encoder.block.7.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"130 | model.encoder.block.7.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"131 | model.encoder.block.7.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"132 | model.encoder.block.7.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"133 | model.encoder.block.7.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"134 | model.encoder.block.7.layer.1                                         | T5LayerFF                  | 4 M   \\n\",\n            \"135 | model.encoder.block.7.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"136 | model.encoder.block.7.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"137 | model.encoder.block.7.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"138 | model.encoder.block.7.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"139 | model.encoder.block.7.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"140 | model.encoder.block.7.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"141 | model.encoder.block.8                                                 | T5Block                    | 7 M   \\n\",\n            \"142 | model.encoder.block.8.layer                                           | ModuleList                 | 7 M   \\n\",\n            \"143 | model.encoder.block.8.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"144 | model.encoder.block.8.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"145 | model.encoder.block.8.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"146 | model.encoder.block.8.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"147 | model.encoder.block.8.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"148 | model.encoder.block.8.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"149 | model.encoder.block.8.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"150 | model.encoder.block.8.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"151 | model.encoder.block.8.layer.1                                         | T5LayerFF                  | 4 M   \\n\",\n            \"152 | model.encoder.block.8.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"153 | model.encoder.block.8.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"154 | model.encoder.block.8.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"155 | model.encoder.block.8.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"156 | model.encoder.block.8.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"157 | model.encoder.block.8.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"158 | model.encoder.block.9                                                 | T5Block                    | 7 M   \\n\",\n            \"159 | model.encoder.block.9.layer                                           | ModuleList                 | 7 M   \\n\",\n            \"160 | model.encoder.block.9.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"161 | model.encoder.block.9.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"162 | model.encoder.block.9.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"163 | model.encoder.block.9.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"164 | model.encoder.block.9.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"165 | model.encoder.block.9.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"166 | model.encoder.block.9.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"167 | model.encoder.block.9.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"168 | model.encoder.block.9.layer.1                                         | T5LayerFF                  | 4 M   \\n\",\n            \"169 | model.encoder.block.9.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"170 | model.encoder.block.9.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"171 | model.encoder.block.9.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"172 | model.encoder.block.9.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"173 | model.encoder.block.9.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"174 | model.encoder.block.9.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"175 | model.encoder.block.10                                                | T5Block                    | 7 M   \\n\",\n            \"176 | model.encoder.block.10.layer                                          | ModuleList                 | 7 M   \\n\",\n            \"177 | model.encoder.block.10.layer.0                                        | T5LayerSelfAttention       | 2 M   \\n\",\n            \"178 | model.encoder.block.10.layer.0.SelfAttention                          | T5Attention                | 2 M   \\n\",\n            \"179 | model.encoder.block.10.layer.0.SelfAttention.q                        | Linear                     | 589 K \\n\",\n            \"180 | model.encoder.block.10.layer.0.SelfAttention.k                        | Linear                     | 589 K \\n\",\n            \"181 | model.encoder.block.10.layer.0.SelfAttention.v                        | Linear                     | 589 K \\n\",\n            \"182 | model.encoder.block.10.layer.0.SelfAttention.o                        | Linear                     | 589 K \\n\",\n            \"183 | model.encoder.block.10.layer.0.layer_norm                             | T5LayerNorm                | 768   \\n\",\n            \"184 | model.encoder.block.10.layer.0.dropout                                | Dropout                    | 0     \\n\",\n            \"185 | model.encoder.block.10.layer.1                                        | T5LayerFF                  | 4 M   \\n\",\n            \"186 | model.encoder.block.10.layer.1.DenseReluDense                         | T5DenseReluDense           | 4 M   \\n\",\n            \"187 | model.encoder.block.10.layer.1.DenseReluDense.wi                      | Linear                     | 2 M   \\n\",\n            \"188 | model.encoder.block.10.layer.1.DenseReluDense.wo                      | Linear                     | 2 M   \\n\",\n            \"189 | model.encoder.block.10.layer.1.DenseReluDense.dropout                 | Dropout                    | 0     \\n\",\n            \"190 | model.encoder.block.10.layer.1.layer_norm                             | T5LayerNorm                | 768   \\n\",\n            \"191 | model.encoder.block.10.layer.1.dropout                                | Dropout                    | 0     \\n\",\n            \"192 | model.encoder.block.11                                                | T5Block                    | 7 M   \\n\",\n            \"193 | model.encoder.block.11.layer                                          | ModuleList                 | 7 M   \\n\",\n            \"194 | model.encoder.block.11.layer.0                                        | T5LayerSelfAttention       | 2 M   \\n\",\n            \"195 | model.encoder.block.11.layer.0.SelfAttention                          | T5Attention                | 2 M   \\n\",\n            \"196 | model.encoder.block.11.layer.0.SelfAttention.q                        | Linear                     | 589 K \\n\",\n            \"197 | model.encoder.block.11.layer.0.SelfAttention.k                        | Linear                     | 589 K \\n\",\n            \"198 | model.encoder.block.11.layer.0.SelfAttention.v                        | Linear                     | 589 K \\n\",\n            \"199 | model.encoder.block.11.layer.0.SelfAttention.o                        | Linear                     | 589 K \\n\",\n            \"200 | model.encoder.block.11.layer.0.layer_norm                             | T5LayerNorm                | 768   \\n\",\n            \"201 | model.encoder.block.11.layer.0.dropout                                | Dropout                    | 0     \\n\",\n            \"202 | model.encoder.block.11.layer.1                                        | T5LayerFF                  | 4 M   \\n\",\n            \"203 | model.encoder.block.11.layer.1.DenseReluDense                         | T5DenseReluDense           | 4 M   \\n\",\n            \"204 | model.encoder.block.11.layer.1.DenseReluDense.wi                      | Linear                     | 2 M   \\n\",\n            \"205 | model.encoder.block.11.layer.1.DenseReluDense.wo                      | Linear                     | 2 M   \\n\",\n            \"206 | model.encoder.block.11.layer.1.DenseReluDense.dropout                 | Dropout                    | 0     \\n\",\n            \"207 | model.encoder.block.11.layer.1.layer_norm                             | T5LayerNorm                | 768   \\n\",\n            \"208 | model.encoder.block.11.layer.1.dropout                                | Dropout                    | 0     \\n\",\n            \"209 | model.encoder.final_layer_norm                                        | T5LayerNorm                | 768   \\n\",\n            \"210 | model.encoder.dropout                                                 | Dropout                    | 0     \\n\",\n            \"211 | model.decoder                                                         | T5Stack                    | 137 M \\n\",\n            \"212 | model.decoder.block                                                   | ModuleList                 | 113 M \\n\",\n            \"213 | model.decoder.block.0                                                 | T5Block                    | 9 M   \\n\",\n            \"214 | model.decoder.block.0.layer                                           | ModuleList                 | 9 M   \\n\",\n            \"215 | model.decoder.block.0.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"216 | model.decoder.block.0.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"217 | model.decoder.block.0.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"218 | model.decoder.block.0.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"219 | model.decoder.block.0.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"220 | model.decoder.block.0.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"221 | model.decoder.block.0.layer.0.SelfAttention.relative_attention_bias   | Embedding                  | 384   \\n\",\n            \"222 | model.decoder.block.0.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"223 | model.decoder.block.0.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"224 | model.decoder.block.0.layer.1                                         | T5LayerCrossAttention      | 2 M   \\n\",\n            \"225 | model.decoder.block.0.layer.1.EncDecAttention                         | T5Attention                | 2 M   \\n\",\n            \"226 | model.decoder.block.0.layer.1.EncDecAttention.q                       | Linear                     | 589 K \\n\",\n            \"227 | model.decoder.block.0.layer.1.EncDecAttention.k                       | Linear                     | 589 K \\n\",\n            \"228 | model.decoder.block.0.layer.1.EncDecAttention.v                       | Linear                     | 589 K \\n\",\n            \"229 | model.decoder.block.0.layer.1.EncDecAttention.o                       | Linear                     | 589 K \\n\",\n            \"230 | model.decoder.block.0.layer.1.EncDecAttention.relative_attention_bias | Embedding                  | 384   \\n\",\n            \"231 | model.decoder.block.0.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"232 | model.decoder.block.0.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"233 | model.decoder.block.0.layer.2                                         | T5LayerFF                  | 4 M   \\n\",\n            \"234 | model.decoder.block.0.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"235 | model.decoder.block.0.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"236 | model.decoder.block.0.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"237 | model.decoder.block.0.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"238 | model.decoder.block.0.layer.2.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"239 | model.decoder.block.0.layer.2.dropout                                 | Dropout                    | 0     \\n\",\n            \"240 | model.decoder.block.1                                                 | T5Block                    | 9 M   \\n\",\n            \"241 | model.decoder.block.1.layer                                           | ModuleList                 | 9 M   \\n\",\n            \"242 | model.decoder.block.1.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"243 | model.decoder.block.1.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"244 | model.decoder.block.1.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"245 | model.decoder.block.1.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"246 | model.decoder.block.1.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"247 | model.decoder.block.1.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"248 | model.decoder.block.1.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"249 | model.decoder.block.1.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"250 | model.decoder.block.1.layer.1                                         | T5LayerCrossAttention      | 2 M   \\n\",\n            \"251 | model.decoder.block.1.layer.1.EncDecAttention                         | T5Attention                | 2 M   \\n\",\n            \"252 | model.decoder.block.1.layer.1.EncDecAttention.q                       | Linear                     | 589 K \\n\",\n            \"253 | model.decoder.block.1.layer.1.EncDecAttention.k                       | Linear                     | 589 K \\n\",\n            \"254 | model.decoder.block.1.layer.1.EncDecAttention.v                       | Linear                     | 589 K \\n\",\n            \"255 | model.decoder.block.1.layer.1.EncDecAttention.o                       | Linear                     | 589 K \\n\",\n            \"256 | model.decoder.block.1.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"257 | model.decoder.block.1.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"258 | model.decoder.block.1.layer.2                                         | T5LayerFF                  | 4 M   \\n\",\n            \"259 | model.decoder.block.1.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"260 | model.decoder.block.1.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"261 | model.decoder.block.1.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"262 | model.decoder.block.1.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"263 | model.decoder.block.1.layer.2.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"264 | model.decoder.block.1.layer.2.dropout                                 | Dropout                    | 0     \\n\",\n            \"265 | model.decoder.block.2                                                 | T5Block                    | 9 M   \\n\",\n            \"266 | model.decoder.block.2.layer                                           | ModuleList                 | 9 M   \\n\",\n            \"267 | model.decoder.block.2.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"268 | model.decoder.block.2.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"269 | model.decoder.block.2.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"270 | model.decoder.block.2.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"271 | model.decoder.block.2.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"272 | model.decoder.block.2.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"273 | model.decoder.block.2.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"274 | model.decoder.block.2.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"275 | model.decoder.block.2.layer.1                                         | T5LayerCrossAttention      | 2 M   \\n\",\n            \"276 | model.decoder.block.2.layer.1.EncDecAttention                         | T5Attention                | 2 M   \\n\",\n            \"277 | model.decoder.block.2.layer.1.EncDecAttention.q                       | Linear                     | 589 K \\n\",\n            \"278 | model.decoder.block.2.layer.1.EncDecAttention.k                       | Linear                     | 589 K \\n\",\n            \"279 | model.decoder.block.2.layer.1.EncDecAttention.v                       | Linear                     | 589 K \\n\",\n            \"280 | model.decoder.block.2.layer.1.EncDecAttention.o                       | Linear                     | 589 K \\n\",\n            \"281 | model.decoder.block.2.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"282 | model.decoder.block.2.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"283 | model.decoder.block.2.layer.2                                         | T5LayerFF                  | 4 M   \\n\",\n            \"284 | model.decoder.block.2.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"285 | model.decoder.block.2.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"286 | model.decoder.block.2.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"287 | model.decoder.block.2.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"288 | model.decoder.block.2.layer.2.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"289 | model.decoder.block.2.layer.2.dropout                                 | Dropout                    | 0     \\n\",\n            \"290 | model.decoder.block.3                                                 | T5Block                    | 9 M   \\n\",\n            \"291 | model.decoder.block.3.layer                                           | ModuleList                 | 9 M   \\n\",\n            \"292 | model.decoder.block.3.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"293 | model.decoder.block.3.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"294 | model.decoder.block.3.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"295 | model.decoder.block.3.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"296 | model.decoder.block.3.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"297 | model.decoder.block.3.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"298 | model.decoder.block.3.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"299 | model.decoder.block.3.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"300 | model.decoder.block.3.layer.1                                         | T5LayerCrossAttention      | 2 M   \\n\",\n            \"301 | model.decoder.block.3.layer.1.EncDecAttention                         | T5Attention                | 2 M   \\n\",\n            \"302 | model.decoder.block.3.layer.1.EncDecAttention.q                       | Linear                     | 589 K \\n\",\n            \"303 | model.decoder.block.3.layer.1.EncDecAttention.k                       | Linear                     | 589 K \\n\",\n            \"304 | model.decoder.block.3.layer.1.EncDecAttention.v                       | Linear                     | 589 K \\n\",\n            \"305 | model.decoder.block.3.layer.1.EncDecAttention.o                       | Linear                     | 589 K \\n\",\n            \"306 | model.decoder.block.3.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"307 | model.decoder.block.3.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"308 | model.decoder.block.3.layer.2                                         | T5LayerFF                  | 4 M   \\n\",\n            \"309 | model.decoder.block.3.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"310 | model.decoder.block.3.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"311 | model.decoder.block.3.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"312 | model.decoder.block.3.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"313 | model.decoder.block.3.layer.2.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"314 | model.decoder.block.3.layer.2.dropout                                 | Dropout                    | 0     \\n\",\n            \"315 | model.decoder.block.4                                                 | T5Block                    | 9 M   \\n\",\n            \"316 | model.decoder.block.4.layer                                           | ModuleList                 | 9 M   \\n\",\n            \"317 | model.decoder.block.4.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"318 | model.decoder.block.4.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"319 | model.decoder.block.4.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"320 | model.decoder.block.4.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"321 | model.decoder.block.4.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"322 | model.decoder.block.4.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"323 | model.decoder.block.4.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"324 | model.decoder.block.4.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"325 | model.decoder.block.4.layer.1                                         | T5LayerCrossAttention      | 2 M   \\n\",\n            \"326 | model.decoder.block.4.layer.1.EncDecAttention                         | T5Attention                | 2 M   \\n\",\n            \"327 | model.decoder.block.4.layer.1.EncDecAttention.q                       | Linear                     | 589 K \\n\",\n            \"328 | model.decoder.block.4.layer.1.EncDecAttention.k                       | Linear                     | 589 K \\n\",\n            \"329 | model.decoder.block.4.layer.1.EncDecAttention.v                       | Linear                     | 589 K \\n\",\n            \"330 | model.decoder.block.4.layer.1.EncDecAttention.o                       | Linear                     | 589 K \\n\",\n            \"331 | model.decoder.block.4.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"332 | model.decoder.block.4.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"333 | model.decoder.block.4.layer.2                                         | T5LayerFF                  | 4 M   \\n\",\n            \"334 | model.decoder.block.4.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"335 | model.decoder.block.4.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"336 | model.decoder.block.4.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"337 | model.decoder.block.4.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"338 | model.decoder.block.4.layer.2.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"339 | model.decoder.block.4.layer.2.dropout                                 | Dropout                    | 0     \\n\",\n            \"340 | model.decoder.block.5                                                 | T5Block                    | 9 M   \\n\",\n            \"341 | model.decoder.block.5.layer                                           | ModuleList                 | 9 M   \\n\",\n            \"342 | model.decoder.block.5.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"343 | model.decoder.block.5.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"344 | model.decoder.block.5.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"345 | model.decoder.block.5.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"346 | model.decoder.block.5.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"347 | model.decoder.block.5.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"348 | model.decoder.block.5.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"349 | model.decoder.block.5.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"350 | model.decoder.block.5.layer.1                                         | T5LayerCrossAttention      | 2 M   \\n\",\n            \"351 | model.decoder.block.5.layer.1.EncDecAttention                         | T5Attention                | 2 M   \\n\",\n            \"352 | model.decoder.block.5.layer.1.EncDecAttention.q                       | Linear                     | 589 K \\n\",\n            \"353 | model.decoder.block.5.layer.1.EncDecAttention.k                       | Linear                     | 589 K \\n\",\n            \"354 | model.decoder.block.5.layer.1.EncDecAttention.v                       | Linear                     | 589 K \\n\",\n            \"355 | model.decoder.block.5.layer.1.EncDecAttention.o                       | Linear                     | 589 K \\n\",\n            \"356 | model.decoder.block.5.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"357 | model.decoder.block.5.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"358 | model.decoder.block.5.layer.2                                         | T5LayerFF                  | 4 M   \\n\",\n            \"359 | model.decoder.block.5.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"360 | model.decoder.block.5.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"361 | model.decoder.block.5.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"362 | model.decoder.block.5.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"363 | model.decoder.block.5.layer.2.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"364 | model.decoder.block.5.layer.2.dropout                                 | Dropout                    | 0     \\n\",\n            \"365 | model.decoder.block.6                                                 | T5Block                    | 9 M   \\n\",\n            \"366 | model.decoder.block.6.layer                                           | ModuleList                 | 9 M   \\n\",\n            \"367 | model.decoder.block.6.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"368 | model.decoder.block.6.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"369 | model.decoder.block.6.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"370 | model.decoder.block.6.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"371 | model.decoder.block.6.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"372 | model.decoder.block.6.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"373 | model.decoder.block.6.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"374 | model.decoder.block.6.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"375 | model.decoder.block.6.layer.1                                         | T5LayerCrossAttention      | 2 M   \\n\",\n            \"376 | model.decoder.block.6.layer.1.EncDecAttention                         | T5Attention                | 2 M   \\n\",\n            \"377 | model.decoder.block.6.layer.1.EncDecAttention.q                       | Linear                     | 589 K \\n\",\n            \"378 | model.decoder.block.6.layer.1.EncDecAttention.k                       | Linear                     | 589 K \\n\",\n            \"379 | model.decoder.block.6.layer.1.EncDecAttention.v                       | Linear                     | 589 K \\n\",\n            \"380 | model.decoder.block.6.layer.1.EncDecAttention.o                       | Linear                     | 589 K \\n\",\n            \"381 | model.decoder.block.6.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"382 | model.decoder.block.6.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"383 | model.decoder.block.6.layer.2                                         | T5LayerFF                  | 4 M   \\n\",\n            \"384 | model.decoder.block.6.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"385 | model.decoder.block.6.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"386 | model.decoder.block.6.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"387 | model.decoder.block.6.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"388 | model.decoder.block.6.layer.2.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"389 | model.decoder.block.6.layer.2.dropout                                 | Dropout                    | 0     \\n\",\n            \"390 | model.decoder.block.7                                                 | T5Block                    | 9 M   \\n\",\n            \"391 | model.decoder.block.7.layer                                           | ModuleList                 | 9 M   \\n\",\n            \"392 | model.decoder.block.7.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"393 | model.decoder.block.7.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"394 | model.decoder.block.7.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"395 | model.decoder.block.7.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"396 | model.decoder.block.7.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"397 | model.decoder.block.7.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"398 | model.decoder.block.7.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"399 | model.decoder.block.7.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"400 | model.decoder.block.7.layer.1                                         | T5LayerCrossAttention      | 2 M   \\n\",\n            \"401 | model.decoder.block.7.layer.1.EncDecAttention                         | T5Attention                | 2 M   \\n\",\n            \"402 | model.decoder.block.7.layer.1.EncDecAttention.q                       | Linear                     | 589 K \\n\",\n            \"403 | model.decoder.block.7.layer.1.EncDecAttention.k                       | Linear                     | 589 K \\n\",\n            \"404 | model.decoder.block.7.layer.1.EncDecAttention.v                       | Linear                     | 589 K \\n\",\n            \"405 | model.decoder.block.7.layer.1.EncDecAttention.o                       | Linear                     | 589 K \\n\",\n            \"406 | model.decoder.block.7.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"407 | model.decoder.block.7.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"408 | model.decoder.block.7.layer.2                                         | T5LayerFF                  | 4 M   \\n\",\n            \"409 | model.decoder.block.7.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"410 | model.decoder.block.7.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"411 | model.decoder.block.7.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"412 | model.decoder.block.7.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"413 | model.decoder.block.7.layer.2.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"414 | model.decoder.block.7.layer.2.dropout                                 | Dropout                    | 0     \\n\",\n            \"415 | model.decoder.block.8                                                 | T5Block                    | 9 M   \\n\",\n            \"416 | model.decoder.block.8.layer                                           | ModuleList                 | 9 M   \\n\",\n            \"417 | model.decoder.block.8.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"418 | model.decoder.block.8.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"419 | model.decoder.block.8.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"420 | model.decoder.block.8.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"421 | model.decoder.block.8.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"422 | model.decoder.block.8.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"423 | model.decoder.block.8.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"424 | model.decoder.block.8.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"425 | model.decoder.block.8.layer.1                                         | T5LayerCrossAttention      | 2 M   \\n\",\n            \"426 | model.decoder.block.8.layer.1.EncDecAttention                         | T5Attention                | 2 M   \\n\",\n            \"427 | model.decoder.block.8.layer.1.EncDecAttention.q                       | Linear                     | 589 K \\n\",\n            \"428 | model.decoder.block.8.layer.1.EncDecAttention.k                       | Linear                     | 589 K \\n\",\n            \"429 | model.decoder.block.8.layer.1.EncDecAttention.v                       | Linear                     | 589 K \\n\",\n            \"430 | model.decoder.block.8.layer.1.EncDecAttention.o                       | Linear                     | 589 K \\n\",\n            \"431 | model.decoder.block.8.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"432 | model.decoder.block.8.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"433 | model.decoder.block.8.layer.2                                         | T5LayerFF                  | 4 M   \\n\",\n            \"434 | model.decoder.block.8.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"435 | model.decoder.block.8.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"436 | model.decoder.block.8.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"437 | model.decoder.block.8.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"438 | model.decoder.block.8.layer.2.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"439 | model.decoder.block.8.layer.2.dropout                                 | Dropout                    | 0     \\n\",\n            \"440 | model.decoder.block.9                                                 | T5Block                    | 9 M   \\n\",\n            \"441 | model.decoder.block.9.layer                                           | ModuleList                 | 9 M   \\n\",\n            \"442 | model.decoder.block.9.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"443 | model.decoder.block.9.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"444 | model.decoder.block.9.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"445 | model.decoder.block.9.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"446 | model.decoder.block.9.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"447 | model.decoder.block.9.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"448 | model.decoder.block.9.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"449 | model.decoder.block.9.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"450 | model.decoder.block.9.layer.1                                         | T5LayerCrossAttention      | 2 M   \\n\",\n            \"451 | model.decoder.block.9.layer.1.EncDecAttention                         | T5Attention                | 2 M   \\n\",\n            \"452 | model.decoder.block.9.layer.1.EncDecAttention.q                       | Linear                     | 589 K \\n\",\n            \"453 | model.decoder.block.9.layer.1.EncDecAttention.k                       | Linear                     | 589 K \\n\",\n            \"454 | model.decoder.block.9.layer.1.EncDecAttention.v                       | Linear                     | 589 K \\n\",\n            \"455 | model.decoder.block.9.layer.1.EncDecAttention.o                       | Linear                     | 589 K \\n\",\n            \"456 | model.decoder.block.9.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"457 | model.decoder.block.9.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"458 | model.decoder.block.9.layer.2                                         | T5LayerFF                  | 4 M   \\n\",\n            \"459 | model.decoder.block.9.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"460 | model.decoder.block.9.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"461 | model.decoder.block.9.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"462 | model.decoder.block.9.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"463 | model.decoder.block.9.layer.2.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"464 | model.decoder.block.9.layer.2.dropout                                 | Dropout                    | 0     \\n\",\n            \"465 | model.decoder.block.10                                                | T5Block                    | 9 M   \\n\",\n            \"466 | model.decoder.block.10.layer                                          | ModuleList                 | 9 M   \\n\",\n            \"467 | model.decoder.block.10.layer.0                                        | T5LayerSelfAttention       | 2 M   \\n\",\n            \"468 | model.decoder.block.10.layer.0.SelfAttention                          | T5Attention                | 2 M   \\n\",\n            \"469 | model.decoder.block.10.layer.0.SelfAttention.q                        | Linear                     | 589 K \\n\",\n            \"470 | model.decoder.block.10.layer.0.SelfAttention.k                        | Linear                     | 589 K \\n\",\n            \"471 | model.decoder.block.10.layer.0.SelfAttention.v                        | Linear                     | 589 K \\n\",\n            \"472 | model.decoder.block.10.layer.0.SelfAttention.o                        | Linear                     | 589 K \\n\",\n            \"473 | model.decoder.block.10.layer.0.layer_norm                             | T5LayerNorm                | 768   \\n\",\n            \"474 | model.decoder.block.10.layer.0.dropout                                | Dropout                    | 0     \\n\",\n            \"475 | model.decoder.block.10.layer.1                                        | T5LayerCrossAttention      | 2 M   \\n\",\n            \"476 | model.decoder.block.10.layer.1.EncDecAttention                        | T5Attention                | 2 M   \\n\",\n            \"477 | model.decoder.block.10.layer.1.EncDecAttention.q                      | Linear                     | 589 K \\n\",\n            \"478 | model.decoder.block.10.layer.1.EncDecAttention.k                      | Linear                     | 589 K \\n\",\n            \"479 | model.decoder.block.10.layer.1.EncDecAttention.v                      | Linear                     | 589 K \\n\",\n            \"480 | model.decoder.block.10.layer.1.EncDecAttention.o                      | Linear                     | 589 K \\n\",\n            \"481 | model.decoder.block.10.layer.1.layer_norm                             | T5LayerNorm                | 768   \\n\",\n            \"482 | model.decoder.block.10.layer.1.dropout                                | Dropout                    | 0     \\n\",\n            \"483 | model.decoder.block.10.layer.2                                        | T5LayerFF                  | 4 M   \\n\",\n            \"484 | model.decoder.block.10.layer.2.DenseReluDense                         | T5DenseReluDense           | 4 M   \\n\",\n            \"485 | model.decoder.block.10.layer.2.DenseReluDense.wi                      | Linear                     | 2 M   \\n\",\n            \"486 | model.decoder.block.10.layer.2.DenseReluDense.wo                      | Linear                     | 2 M   \\n\",\n            \"487 | model.decoder.block.10.layer.2.DenseReluDense.dropout                 | Dropout                    | 0     \\n\",\n            \"488 | model.decoder.block.10.layer.2.layer_norm                             | T5LayerNorm                | 768   \\n\",\n            \"489 | model.decoder.block.10.layer.2.dropout                                | Dropout                    | 0     \\n\",\n            \"490 | model.decoder.block.11                                                | T5Block                    | 9 M   \\n\",\n            \"491 | model.decoder.block.11.layer                                          | ModuleList                 | 9 M   \\n\",\n            \"492 | model.decoder.block.11.layer.0                                        | T5LayerSelfAttention       | 2 M   \\n\",\n            \"493 | model.decoder.block.11.layer.0.SelfAttention                          | T5Attention                | 2 M   \\n\",\n            \"494 | model.decoder.block.11.layer.0.SelfAttention.q                        | Linear                     | 589 K \\n\",\n            \"495 | model.decoder.block.11.layer.0.SelfAttention.k                        | Linear                     | 589 K \\n\",\n            \"496 | model.decoder.block.11.layer.0.SelfAttention.v                        | Linear                     | 589 K \\n\",\n            \"497 | model.decoder.block.11.layer.0.SelfAttention.o                        | Linear                     | 589 K \\n\",\n            \"498 | model.decoder.block.11.layer.0.layer_norm                             | T5LayerNorm                | 768   \\n\",\n            \"499 | model.decoder.block.11.layer.0.dropout                                | Dropout                    | 0     \\n\",\n            \"500 | model.decoder.block.11.layer.1                                        | T5LayerCrossAttention      | 2 M   \\n\",\n            \"501 | model.decoder.block.11.layer.1.EncDecAttention                        | T5Attention                | 2 M   \\n\",\n            \"502 | model.decoder.block.11.layer.1.EncDecAttention.q                      | Linear                     | 589 K \\n\",\n            \"503 | model.decoder.block.11.layer.1.EncDecAttention.k                      | Linear                     | 589 K \\n\",\n            \"504 | model.decoder.block.11.layer.1.EncDecAttention.v                      | Linear                     | 589 K \\n\",\n            \"505 | model.decoder.block.11.layer.1.EncDecAttention.o                      | Linear                     | 589 K \\n\",\n            \"506 | model.decoder.block.11.layer.1.layer_norm                             | T5LayerNorm                | 768   \\n\",\n            \"507 | model.decoder.block.11.layer.1.dropout                                | Dropout                    | 0     \\n\",\n            \"508 | model.decoder.block.11.layer.2                                        | T5LayerFF                  | 4 M   \\n\",\n            \"509 | model.decoder.block.11.layer.2.DenseReluDense                         | T5DenseReluDense           | 4 M   \\n\",\n            \"510 | model.decoder.block.11.layer.2.DenseReluDense.wi                      | Linear                     | 2 M   \\n\",\n            \"511 | model.decoder.block.11.layer.2.DenseReluDense.wo                      | Linear                     | 2 M   \\n\",\n            \"512 | model.decoder.block.11.layer.2.DenseReluDense.dropout                 | Dropout                    | 0     \\n\",\n            \"513 | model.decoder.block.11.layer.2.layer_norm                             | T5LayerNorm                | 768   \\n\",\n            \"514 | model.decoder.block.11.layer.2.dropout                                | Dropout                    | 0     \\n\",\n            \"515 | model.decoder.final_layer_norm                                        | T5LayerNorm                | 768   \\n\",\n            \"516 | model.decoder.dropout                                                 | Dropout                    | 0     \\n\",\n            \"517 | model.lm_head                                                         | Linear                     | 24 M  \\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"915a0b65612243668570c555a47a6c37\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\r\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"3ec26f803d124dd0877e1ce0e3517f68\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"/pytorch/torch/csrc/utils/python_arg_parser.cpp:756: UserWarning: This overload of add_ is deprecated:\\n\",\n            \"\\tadd_(Number alpha, Tensor other)\\n\",\n            \"Consider using one of the following signatures instead:\\n\",\n            \"\\tadd_(Tensor other, *, Number alpha)\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"dbe7a4854b8f420faaea8de4583fb1f0\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:__main__:***** Validation results *****\\n\",\n            \"INFO:__main__:avg_val_loss = tensor(0.0839, device='cuda:0')\\n\",\n            \"\\n\",\n            \"INFO:__main__:loss = tensor(0.0199, device='cuda:0')\\n\",\n            \"\\n\",\n            \"INFO:__main__:train_loss = tensor(0.0199, device='cuda:0')\\n\",\n            \"\\n\",\n            \"INFO:__main__:val_loss = tensor(0.0839, device='cuda:0')\\n\",\n            \"\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"f40c9bf16c9a473ba758a6439dce2652\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:__main__:***** Validation results *****\\n\",\n            \"INFO:__main__:avg_train_loss = tensor(0.2954, device='cuda:0')\\n\",\n            \"\\n\",\n            \"INFO:__main__:avg_val_loss = tensor(0.0874, device='cuda:0')\\n\",\n            \"\\n\",\n            \"INFO:__main__:epoch = 0\\n\",\n            \"\\n\",\n            \"INFO:__main__:loss = tensor(0.0066, device='cuda:0')\\n\",\n            \"\\n\",\n            \"INFO:__main__:train_loss = tensor(0.0066, device='cuda:0')\\n\",\n            \"\\n\",\n            \"INFO:__main__:val_loss = tensor(0.0874, device='cuda:0')\\n\",\n            \"\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"execute_result\",\n          \"data\": {\n            \"text/plain\": [\n              \"1\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          },\n          \"execution_count\": 30\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"l-obOz6v70iB\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"!mkdir t5_base_imdb_sentiment\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"OQBJcrrWi2vC\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"a98adf77-6e23-4304-8ccc-5b13a33a2a32\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 50\n        }\n      },\n      \"source\": [\n        \"## save the model this way so next time you can load it using T5ForConditionalGeneration.from_pretrained\\n\",\n        \"model.model.save_pretrained('t5_base_imdb_sentiment')\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:transformers.configuration_utils:Configuration saved in t5_base_imdb_sentiment/config.json\\n\",\n            \"INFO:transformers.modeling_utils:Model weights saved in t5_base_imdb_sentiment/pytorch_model.bin\\n\"\n          ],\n          \"name\": \"stderr\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"XhjELPOk7-cz\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"# !cp -r t5_base_imdb_sentiment drive/My\\\\ Drive/\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"brPOSAkjNP5t\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"### Eval\"\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"_7SuVh05lDrJ\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"For inference we will use the `generate` method with greedy decoding with max length 2.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"25jbT49CVoXN\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"import textwrap\\n\",\n        \"from tqdm.auto import tqdm\\n\",\n        \"from sklearn import metrics\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"cyriGR20lSRa\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"Let's visualize few predictions on test dataset\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"wwJ998sMz2Ci\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"dataset = ImdbDataset(tokenizer, 'aclImdb', 'test',  max_len=512)\\n\",\n        \"loader = DataLoader(dataset, batch_size=32, shuffle=True)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"2LQtN5b90TyW\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"it = iter(loader)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"TRD03teH0YMe\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"d43041e6-5d7d-49d5-e91a-7530c5d1d6b1\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 34\n        }\n      },\n      \"source\": [\n        \"batch = next(it)\\n\",\n        \"batch[\\\"source_ids\\\"].shape\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"execute_result\",\n          \"data\": {\n            \"text/plain\": [\n              \"torch.Size([32, 512])\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          },\n          \"execution_count\": 36\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"eewDktozk7GN\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"outs = model.model.generate(input_ids=batch['source_ids'].cuda(), \\n\",\n        \"                              attention_mask=batch['source_mask'].cuda(), \\n\",\n        \"                              max_length=2)\\n\",\n        \"\\n\",\n        \"dec = [tokenizer.decode(ids) for ids in outs]\\n\",\n        \"\\n\",\n        \"texts = [tokenizer.decode(ids) for ids in batch['source_ids']]\\n\",\n        \"targets = [tokenizer.decode(ids) for ids in batch['target_ids']]\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"9vBe0UNw7cHY\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"2f0171ac-8d7d-41db-db31-d57bf72bc205\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 1000\n        }\n      },\n      \"source\": [\n        \"for i in range(32):\\n\",\n        \"    lines = textwrap.wrap(\\\"Review:\\\\n%s\\\\n\\\" % texts[i], width=100)\\n\",\n        \"    print(\\\"\\\\n\\\".join(lines))\\n\",\n        \"    print(\\\"\\\\nActual sentiment: %s\\\" % targets[i])\\n\",\n        \"    print(\\\"Predicted sentiment: %s\\\" % dec[i])\\n\",\n        \"    print(\\\"=====================================================================\\\\n\\\")\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"Review: I dont know where to begin Perhaps the whole idea of this movie was just a disaster waiting\\n\",\n            \"to happen There is nothing slightly humorous about a kidnapping I dont know what was more\\n\",\n            \"offensivethe subject matter or David Arquettes performance It was like watching a bull get its penis\\n\",\n            \"cut off although I think the bull felt better afterwards The filmmakers should find something about\\n\",\n            \"Sinatra other than his sons kidnapping to show like I dont know his TALENT AS A SINGER His family\\n\",\n            \"shouldnt have to relive that horror Thank GOD it was just shown on HBO and not released in theaters\\n\",\n            \"Please dont watch this if you have any self respect\\n\",\n            \"\\n\",\n            \"Actual sentiment: negative\\n\",\n            \"Predicted sentiment: negative\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: A fine performance by Vittorio Mezzogiorno and a masterful one by JeanHugues Anglade adorn\\n\",\n            \"this stange tale of lust desire and alienation in France The work of the two lead performers is\\n\",\n            \"strikingsubtle intense and passionate Alas the script is deliberately turgid and sordid and the\\n\",\n            \"overall effect leaves one with a downcast spirit Still those who can appreciate fine quality acting\\n\",\n            \"will be able to savor the courageous work of the leads in this often difficult film journey of\\n\",\n            \"Gallic low life\\n\",\n            \"\\n\",\n            \"Actual sentiment: positive\\n\",\n            \"Predicted sentiment: negative\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: I almost stopped watching Hindi movies because of the mediocre quality and story lines One\\n\",\n            \"exception for this is Ramgopal Verma movies This is a nice movie with great performances from the\\n\",\n            \"star cast This is must see movie for those who are sick of watching stupid dancing and love stories\\n\",\n            \"The adaptation of the story and characterization was exceptional goodYou should watch this movie for\\n\",\n            \"Nana Patekar based on the life of Mumbai cop Daya Naik this movie deals in a more realistic way The\\n\",\n            \"film delves into the life of the common man which he has apart from being an encounter specialist I\\n\",\n            \"rate this as one of the best movie of the year\\n\",\n            \"\\n\",\n            \"Actual sentiment: positive\\n\",\n            \"Predicted sentiment: positive\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: This was a really interesting Halloween film I wasnt to thrilled with the whole Thorn theory\\n\",\n            \"but it still makes for a good film I liked getting to see Tommy Doyle back but sadly Donald\\n\",\n            \"Pleasance died right after shooting The film had a really REALLY bad director who didnt give a flip\\n\",\n            \"about the series from what I heard treated Donald bad and wouldnt let Danielle Harris come back as\\n\",\n            \"Jamie Its like he was just trying to bring down the film but I still liked it There were alot of\\n\",\n            \"cuts and music changes and if youre lucky you can get the Producers Cut which features over 40 min\\n\",\n            \"of never before scenes With those scenes it turns into a whole new movie Check it out if you have\\n\",\n            \"the chance\\n\",\n            \"\\n\",\n            \"Actual sentiment: positive\\n\",\n            \"Predicted sentiment: positive\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: Cheerleader Massacre was supposed to be the fourth installment of the Slumber Party Massacre\\n\",\n            \"series if thats what they were doing which it is considering ONE actress from the original returns\\n\",\n            \"in a small cameo role they have failed miserably and made by far the worst installment of the\\n\",\n            \"quadrilogy Cheerleader Massacre seamlessly combines bad acting a horrible plot a dumb killer dull\\n\",\n            \"and boring deaths boring scenery and hideous camera work to make it one of the worst films ever made\\n\",\n            \"Did I already mention how bad it was Dont get me wrong this cheesy and retarded excuse for a horror\\n\",\n            \"film is nowhere near as bad as Napoleon Dynamite but it is undeniably a horrible movieCheerleader\\n\",\n            \"Massacre is an exact polar opposite of the original Slumber Party Massacre Stay away by all means\\n\",\n            \"This movie is utter garbage\\n\",\n            \"\\n\",\n            \"Actual sentiment: negative\\n\",\n            \"Predicted sentiment: negative\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: This movie will give me nightmares I will wake up drenched in sweat screaming I didnt make\\n\",\n            \"this film please dont blame me I honestly think it would have been more entertaining to watch a fat\\n\",\n            \"guy eating lard in his moms basement for a hour or two than to watch this crap I understand money\\n\",\n            \"was tight but goddamn what the hell were they thinking there was no thought plot or effort put into\\n\",\n            \"this This movie needs a warning Please for the love of god dont fund the drama department a the\\n\",\n            \"local JC On an other note these are the least likable characters I have ever seen and I have seen\\n\",\n            \"movies with Hitler in them So lastly take my advice the next time you even think about renting this\\n\",\n            \"just pop a few hundred Adivl and let the sleep come\\n\",\n            \"\\n\",\n            \"Actual sentiment: negative\\n\",\n            \"Predicted sentiment: negative\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: In this movie Virtual Sexuality the 17 year old Justine is not lucky in love One day when\\n\",\n            \"she is stood up she goes with her friend to a virual reality conference there she is introduced with\\n\",\n            \"a machine that can change your look dody and whatever you like in Virtual Reality She decides to try\\n\",\n            \"it out but begins to make a boyfriend of her own her dreamdate Then suddenly there is an explosion\\n\",\n            \"in a gas pipe and her creation comes to life Ill say no more youll have to watch the movie which is\\n\",\n            \"quite fun to watch\\n\",\n            \"\\n\",\n            \"Actual sentiment: positive\\n\",\n            \"Predicted sentiment: positive\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: Why does C Thomas Howell do these movies Cruise Howells one time costar does a huge\\n\",\n            \"blockbuster of WOTW and Howell follows with this lame effortWhere do I start here Production Values\\n\",\n            \"Ill start with the good stuff The look and feel of some of the scenes in this movie are not too bad\\n\",\n            \"to be honest The setups are okay in spots and the direction not too badScript Terrible A series of\\n\",\n            \"clunky scenes that could have been put in any order you like permeate throughout the movie The\\n\",\n            \"amount of times the scene faded to black and reemerged a second later in the same room was\\n\",\n            \"uncountable Very poor storyline but so was the Cruise WOTW takes some blame but an abysmal\\n\",\n            \"screenplay kills it offSpecial FX Okay I dont want to be too harsh here as I imagine the budget was\\n\",\n            \"smaller than Cruises lunch bill but in the overall context of the film the effects are badly done\\n\",\n            \"Some shots are quite impressive mainly far off destruction shots of bridges Washington liner But in\\n\",\n            \"the main the alien machines and tentacles themselves are dreadful Also the camera quality is fuzzy\\n\",\n            \"on some shots and cuts away entirely on othersActing Im a fan of Howell but as he has reduced\\n\",\n            \"himself to acting in these lowbudget flicks he has succumbed to the overacting bug a long time ago\\n\",\n            \"Look at his performance in The Hitcher and compare it to this movie There is no comparison He\\n\",\n            \"overdoes his facial expressions his flailing arms and legs where did he get that running style and\\n\",\n            \"for a final coupdegras look at the scene where he loses the photo of his family Hysterical But after\\n\",\n            \"saying all that he is still the best actor on show here Busey is embarrassing to look at and Peter\\n\",\n            \"Green Zed is truly dead now baby mumbles incoherently through his one and only scene I honestly\\n\",\n            \"could not understand one word he said I even went so far as trying to enable the subtitles on that\\n\",\n            \"scene but the DVD did not have subtitles This seems to be a real keepitinthefamily affair too as\\n\",\n            \"Howells son the directors wife and the line producer all make it into the film None of them are\\n\",\n            \"goodDirection not bad but not good eitherScore DismalOverall a lame duck effort that will don'thing\\n\",\n            \"for Howell in his\\n\",\n            \"\\n\",\n            \"Actual sentiment: negative\\n\",\n            \"Predicted sentiment: negative\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: Another Asian horror movie packed with intense and creepy moments Another Asian horror\\n\",\n            \"trademark is the complexity of the plot which is here as well MAJOR SPOILER WARNINGThe movie starts\\n\",\n            \"pretty simple two sisters go to live with their dad and stepmother after being put in a mental\\n\",\n            \"institution after their mother hanged herself The sisters seem very hostile towards their mother\\n\",\n            \"especially the elder one and they seem to ignore their father All goes smoothly until the mother\\n\",\n            \"locks the young sister in the wardrobe and the elder sister tells her father Then it hits you your\\n\",\n            \"sister has been dead for years now It turns out the older sister is still not recovered from the\\n\",\n            \"death of her mother and what we didnt know is that the wardrobe the mother was hanged in fell on the\\n\",\n            \"younger sister and killed her as wellAs for the stepmother she is the alter ego of the older sister\\n\",\n            \"revealed when the stepmother actually the sisters alter ego is sitting on a couch when the real\\n\",\n            \"stepmother walks in I hope it has been made clearer for confused Asian horror fans out thereFinally\\n\",\n            \"my favourite scene is the scene where the father invites friends over for dinner and one of the\\n\",\n            \"friends starts to choke which erupts into a panic attack Very creepy 7 out of 10\\n\",\n            \"\\n\",\n            \"Actual sentiment: positive\\n\",\n            \"Predicted sentiment: positive\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: As a native of New Orleans I can state that almost everything in this movie from the\\n\",\n            \"atrocious NAwlins dialect to the highly creative manipulation of Crescent City geography is horrible\\n\",\n            \"This is another one of those Big Hollywood movies that decides to stereotype New Orleans as 1 A city\\n\",\n            \"full of Frenchsounding idiots 2 A city full of people who sound as if theyve just returned from\\n\",\n            \"Blanche Dubois summer home 3 A city of drunkards where every day is Mardi Gras 4 A city of deep\\n\",\n            \"mystery where almost everyone practices or is a victim of voodoo I admit that maybe we are a city of\\n\",\n            \"drunkards although every day is NOT Mardi Gras The Big Easy is one of the worst films about New\\n\",\n            \"Orleans I wouldnt recommend it to anybody\\n\",\n            \"\\n\",\n            \"Actual sentiment: negative\\n\",\n            \"Predicted sentiment: negative\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: This selfimportant confusing b+w film watches like an infant on a very bad acid trip Youre\\n\",\n            \"dealing with something that reminds you of a piece of rotting lettuce that accidentally fell out of\\n\",\n            \"the back of a garbage truck no one cares to touch it because it will probably be washed away on its\\n\",\n            \"own down the storm drain Theres no room for plot when youve got visceral imagery and subtle allegory\\n\",\n            \"To me it seems like the director tries to make the next great art movie while begging for\\n\",\n            \"intellectual accolades I didnt bring my beret either Watching this I felt almost insulted since the\\n\",\n            \"film does such an effective job of distancing itself from you\\n\",\n            \"\\n\",\n            \"Actual sentiment: negative\\n\",\n            \"Predicted sentiment: negative\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: Mercy the movie actually starts out as a somewhat decent film and ellen barkin does give a\\n\",\n            \"strong performance But if you have read the book and actually got to know the characters and cared\\n\",\n            \"who done it the movie just does not compare It is always hard to brink a book onto film and\\n\",\n            \"unfortunatley this one ends up failing 3 out of 10\\n\",\n            \"\\n\",\n            \"Actual sentiment: negative\\n\",\n            \"Predicted sentiment: negative\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: What in the world This piece of gambling cinema would have been suitable for the Lifetime\\n\",\n            \"Network Michael Imperoli is a good actor but I think his portrayal as Stu fell short The montages\\n\",\n            \"were unbearable and too many The supporting cast where are you Whoever did the casting should be\\n\",\n            \"partially at fault The cinematography was useless A gambling story with an after school feel to it\\n\",\n            \"Stories of this sort should be left for the Oliver Stones of the world It would still suck ass but\\n\",\n            \"at least it would be fun to watch It was an attempt that lost its wheels before the race ever begun\\n\",\n            \"Mario Andretti in the 1982 Indy 500 came to mind\\n\",\n            \"\\n\",\n            \"Actual sentiment: negative\\n\",\n            \"Predicted sentiment: negative\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: This movie definitely made me laugh but that doesnt mean it was exactly funny Well then\\n\",\n            \"again me and my friends had a lot of fun watching itI doubt there is anything about this movie that\\n\",\n            \"hasnt been done at least twice before just like the plot itself All of the characters are overused\\n\",\n            \"movie cliché cardboardbox roles that dont even require acting skills accordingly such skills are not\\n\",\n            \"delivered We have the corrupt cop a ruthless killer who claims to care about his men and their\\n\",\n            \"families whilst caring nothing about people he shoots in the forehead at so close a range as to have\\n\",\n            \"blood spat on his face We have the wornout cop on the edge so nicely pointed at in the discussion\\n\",\n            \"boards of this movie we have the old onedayawayfromretirementcop who just about everyone must have\\n\",\n            \"immediately identified as the most likely man on the inside since he had most to gain and he didnt\\n\",\n            \"utter a trustworthy word throughout the movie About as seethrough as a glass house on a sunny day\\n\",\n            \"The big black gangster king was a copy of all previous big black gangster kings in movie history\\n\",\n            \"they couldve just called him Marcellus Wallace but just slightly tougher and more ruthless because\\n\",\n            \"something has to emphasize that we also know Laurence Fishburne from actually good movies Then we\\n\",\n            \"finally have the HIGHLY EDUCATED doctor who cant think of anything reasonable to do as soon as the\\n\",\n            \"situation differs from her ordinary life and who spends the majority of the movie sitting in a\\n\",\n            \"corner helplessly trying to figure out how to hold on to the weapon she was given NOT USING ITThe\\n\",\n            \"whole siege story is not interesting not original having been used twice before and this movie\\n\",\n            \"manages to add absolutely nothing interesting to it There is the initial probe then the laying of\\n\",\n            \"the siege then the assault then the escape attempts Meanwhile a bunch of strained stressed freaked\\n\",\n            \"out cops and thugs manage to hold off a Police assault team with hightech equipment and the quite\\n\",\n            \"important advantage of VISION Then again in deep night with the power cut and with a snow storm\\n\",\n            \"raging overhead there is definitely a lot of light coming in so who really cares about night\\n\",\n            \"visionBut the best part comes right at the end In the first scenes showing Precinct 13 we see it is\\n\",\n            \"situated in an outskirt of an industrial city factories and office buildings surround it on all\\n\",\n            \"sides From this point\\n\",\n            \"\\n\",\n            \"Actual sentiment: negative\\n\",\n            \"Predicted sentiment: negative\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: Some people say this show was good in its early years I disagree with all of em The show is\\n\",\n            \"just plain stupid and pathetic My mum hates it I hate it my dad hates it I dont know about my sister\\n\",\n            \"but oh well Here some reasons why1 THE CHARACTERS Babies being used as grown up style characters are\\n\",\n            \"stupid The babies are just precocious and annoying The grown ups and adults are dumb and unappealing\\n\",\n            \"The worst character is that Angelica Pickles she really does it in for your ear drums when you had a\\n\",\n            \"long hard and miserable day at the office and also that Kimi Finster who appears later on she is too\\n\",\n            \"over optimistic and a pain in the butt She cant decided whither she is French or Japanese it doesnt\\n\",\n            \"matter know you are a American Citizen know and thats that Oh what am I talking about all the\\n\",\n            \"characters from this show suck2 THE STORIES The stories are unoriginal and dumb The make it like the\\n\",\n            \"babies go off on a great adventure yeah to the back yard shed In one episode that little goofy brat\\n\",\n            \"Tommy Pickles the Leader broke in to a televisions control room and literally almost destroyed it\\n\",\n            \"Dont give kids any idea to smash up normal TV Stations control rooms they pay a awful lot of money\\n\",\n            \"for them in real life I can imagine what the broadcasters must of felt like airing this episode they\\n\",\n            \"will probably start staring at their machines throughout the day scared that a baby will brake in\\n\",\n            \"Sad3 OVER RATED The show has been dragging on for years now and people are still making up stories\\n\",\n            \"and new series and spinoffs for this Get off The Simpsons have been going for nearly the same amount\\n\",\n            \"of time as this but they are much better and funnier than babies The show is just plain over rated\\n\",\n            \"People where is your common senseAnyway I surprised TV Stations across the world want to air this\\n\",\n            \"series even off today The show is utter junk and should have never been produced The two movies for\\n\",\n            \"this cartoons sucked just the same 210\\n\",\n            \"\\n\",\n            \"Actual sentiment: negative\\n\",\n            \"Predicted sentiment: negative\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: What can I saynot much to this one at all Pretty dull and uninterestingThe actors\\n\",\n            \"performances are just OK The only one that shines in any way is Simmons but he only has maybe 3\\n\",\n            \"scenes I understand that by keeping his screen time to a minimum he retains the mysterious psychic\\n\",\n            \"aura he has but I cant help but feel his talent was wasted No one else rose above mediocreThe story\\n\",\n            \"itself seems like it may be intriguing at the beginning but then just doesnt go anywhere There wasnt\\n\",\n            \"a single scene in the movie that impressed me or made me feel like I had just seen something special\\n\",\n            \"The cinematography was fairly blandI mean desert in a washed out sort of sepianot very inspiringThe\\n\",\n            \"story of his childhood pal back outta prison seemed only partially thought out and didnt really add\\n\",\n            \"anything to the story other than making an average Twilight Zone script into a full length\\n\",\n            \"featureDrab\\n\",\n            \"\\n\",\n            \"Actual sentiment: negative\\n\",\n            \"Predicted sentiment: negative\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: I kept watching it because it seemed like the plot was going somewhere When it ambiguously\\n\",\n            \"got there I was very disappointed Im going to tell you what really happened in the next sentence But\\n\",\n            \"maybe I wont Maybe Ill just imply something will happen The writers lacked any imagination This is\\n\",\n            \"not even a B movie its a made for TV B movie\\n\",\n            \"\\n\",\n            \"Actual sentiment: negative\\n\",\n            \"Predicted sentiment: negative\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: Straight up I love this film I love everything about it It has a great soundtrack it has a\\n\",\n            \"lot of recognizable faces and it is funny as hell There are so many plots in this film and every one\\n\",\n            \"of them is funny in one way or anotherWhere as Spicolli lit up the screen two years back Drake is\\n\",\n            \"almost as memorable of a character All he wants to do is have fun He moves out of the house without\\n\",\n            \"his parents consent he skips work whenever he feels like it he is obsessed with sex he loves his\\n\",\n            \"drugs and booze and he tries to be a good friend It is his lacksidaisical attitude that makes him\\n\",\n            \"such a joy to watch And he comes out with some great lines And there are so many tiny observations\\n\",\n            \"that you dont see coming but they make you laugh at the sheer velocity when it hits you One\\n\",\n            \"particular moment is when Tommy and Bill are talking about Bills ex girlfriend dating someone else\\n\",\n            \"now At the end of the conversation Tommy takes his huge beer bottle and just throws it over his\\n\",\n            \"shoulder casually He then says good night and the scene ends It is a perfect scene Tommys world is\\n\",\n            \"his own He really lives to party and have fun When the conversation is over his time is over and he\\n\",\n            \"doesnt care who he offends in the process He has an innocence about him Its casual is his favourite\\n\",\n            \"sayingAnother such classic scene is Reggie handing Bill a donut He says something to him that me and\\n\",\n            \"my friends will never forget because we rewound the film ten times and watched that part over and\\n\",\n            \"over again and hurt ourselves laughing It has to be seen to be appreciatedWild Life is a throw back\\n\",\n            \"to when teen comedies were funny raunchy had a good ear entertained us and just wanted us to get\\n\",\n            \"lost in their world for 90 minutes Wild Life does all those things perfectly If this is a film that\\n\",\n            \"you havent seen give it a chance It is a classicAlso check out the army store guy that Jim has\\n\",\n            \"problems with He is a very familiar face now and it is his first role on the big screen\\n\",\n            \"\\n\",\n            \"Actual sentiment: positive\\n\",\n            \"Predicted sentiment: positive\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: This movie is a real shame not just for the plotthe empty performance of the characters it\\n\",\n            \"is for the lack of creativity from the director and all the crew this is maybe one of the worst\\n\",\n            \"movies of all timesand it is hard to believe that is the sequel of one of the most famous movies of\\n\",\n            \"the 90sI am a great fan of The Mask when I went to see this movie I was expecting to a movie with a\\n\",\n            \"good sense of humor a movie with a acceptable plot instead I saw a really bad copy of Chuck Jones\\n\",\n            \"and Tex Avery cartoons the movie was not funny even for my 7 years old sister so I wonderWhat was\\n\",\n            \"wrong New Line CinemaWas it trying to repeat the success of the first movie or was it trying to\\n\",\n            \"create another masterpiece like The Lord of the RingsBecause if they did they were completely out of\\n\",\n            \"their minds\\n\",\n            \"\\n\",\n            \"Actual sentiment: negative\\n\",\n            \"Predicted sentiment: negative\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: eXistenZ is simply David Cronenbergs best movie All the people compare it to the Matrix\\n\",\n            \"Theyre not even similar If you enjoyed Cronenbergs other works just a little bit youll love this one\\n\",\n            \"\\n\",\n            \"Actual sentiment: positive\\n\",\n            \"Predicted sentiment: positive\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: First time I ever felt I needed to write a reviewI have seen thousands of movies in my life\\n\",\n            \"and I like a wide range of movies I am reasonably opened minded and can easily say I enjoyed a movie\\n\",\n            \"while still saying yeah it was not good but I enjoyed it I can appreciate the mastery of great\\n\",\n            \"movies like The Shaw shank redemption the godfather and American history X I can like good movies in\\n\",\n            \"a genre like horror or comedy even if the movie might not be that great I can even enjoy a bad movie\\n\",\n            \"that just happens to entertain me Bloodsport I also will try to rate movie fairly even if I did not\\n\",\n            \"like it City lights by Charlie Chapin was not a movie I enjoyed but I can appreciate the acting and\\n\",\n            \"story lines for the timeI think some people when they go on this site instead of randomly click a\\n\",\n            \"rating should take a few ideas into account Try to rate the movie based on how good it actually was\\n\",\n            \"Do not let your personal bias affect the rating Also look at other moves you rated and compare the\\n\",\n            \"movie you are going to rateThis movie was the worst piece of trash I have ever seen 2 hours of my\\n\",\n            \"life where just stolen The acting was awful across the board The scenes where choppy at best However\\n\",\n            \"the real disgrace was the story The first 20 minutes we actually had a story that tried to make\\n\",\n            \"sense and take the viewer from point A to B However after that it was a nightmare They kept trying\\n\",\n            \"to add new elements but nothing was every explained Nothing really ever made sense was steward dead\\n\",\n            \"is he alive did he hit by lighting was it really lighting was it aliens is he an alien etc The\\n\",\n            \"ending tied nothing together and really did not answer any questions The only positive was nobody\\n\",\n            \"cared we where just happy to leave the theater6510 What is wrong with some of you I will admit that\\n\",\n            \"the 8 of us where so mad about seeing this we did think what would make it better and we decide to\\n\",\n            \"tell a few of our friends that this movie was good so they would have to suffer and see this movie\\n\",\n            \"What can I say misery loves company That is really the only reason I can see for a 65 ratingDo not\\n\",\n            \"waste your life\\n\",\n            \"\\n\",\n            \"Actual sentiment: negative\\n\",\n            \"Predicted sentiment: negative\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: My main criticism with the movie is the animation I totally agree with everyone else it was\\n\",\n            \"very poor Some of the characters seemed to have darker skin tones than they did in the first film\\n\",\n            \"which is much better Also the background colours looked rushed and somewhat static It is also a\\n\",\n            \"shame that Michael JFox didnt voice Milo he did such a good job and James Arnold Taylor wasnt sure\\n\",\n            \"whether he was supposed to sound like Milo or Aladdin I have also taken into consideration the lack\\n\",\n            \"of a good storyline the third story was confusing and clumsily told and the second story suffered\\n\",\n            \"from poor scripting To make things worse the first one I cant even remember other than a fishing\\n\",\n            \"village being haunted or something like that However there was some nice music and good voice\\n\",\n            \"talents from John Mahoney Cree Summer Clancy Brown and Tom Wilson that saved the film from total\\n\",\n            \"disaster All in all a disappointing sequel to a surprisingly good film 410 Bethany Cox\\n\",\n            \"\\n\",\n            \"Actual sentiment: negative\\n\",\n            \"Predicted sentiment: negative\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: Giant Robot was the most popular Japanese TV serial ever seen on Indian TV It was targeted\\n\",\n            \"to children and we saw a robot for the first time in our life Many Indian children must have even\\n\",\n            \"seen a machine for the first time outside the school textbooks The serial also showed a child in an\\n\",\n            \"adults organization fighting evil No doubt many of us who have seen Giant Robot in our childhood\\n\",\n            \"long for our own robots and as a stopgap arrangement look upon our computers in the same way This\\n\",\n            \"show also portrayed ideal adults referring at Jerry Johnnys buddy friend and Unicorn chief Azuma We\\n\",\n            \"grew to respect Japanese progress and still view Japan as the ideal Asian nationBTW at that time\\n\",\n            \"there were no satellite TV channels in India and the govt owned broadcaster did not show much of\\n\",\n            \"Disney cartoons I guess that was how child serials like giant Robot got appreciated Nowadays there\\n\",\n            \"is Pokemon etc but they are no so fascinating or alluring as Giant robot\\n\",\n            \"\\n\",\n            \"Actual sentiment: positive\\n\",\n            \"Predicted sentiment: positive\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: This might sound weird but I only got to see the first movie The Emperors New Groove\\n\",\n            \"yaddayadda a week ago and only because of one episode of the TV show I simply adore Kuzcos character\\n\",\n            \"but Kronk isnt that bad either Anyway eventually I decided to watch the second film just so I\\n\",\n            \"wouldve seen it Hoped it would be as good as the first one but Im sorry to tell this but the more\\n\",\n            \"the humour got American the more I yawned I agreed with Kuzco when he started crying seeing all the\\n\",\n            \"cheesy footageStill younger kids and probably veterans too will love this movie to bits if they like\\n\",\n            \"the old school moralising Disney that is but I just had expectations that were an eensy teensy\\n\",\n            \"little bit hell of a lot higher than they shouldve been Kronk is a lovely character being good\\n\",\n            \"hearted and dumb all at once but it were Pacha and Kuzco in drag that woke me up at the end of the\\n\",\n            \"movie Ill ignore Rudy for as far thats possibleAnyway great movie just not my style and as they say\\n\",\n            \"you always have to be true to your groove\\n\",\n            \"\\n\",\n            \"Actual sentiment: negative\\n\",\n            \"Predicted sentiment: positive\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: The problem I have as a Finn is that most of the actors in this movie are in every Finnish\\n\",\n            \"movie I have a feeling that Finland has only like five actors I think that if youre not from Finland\\n\",\n            \"you really like this movie as a refreshing noveltyThis movie is about a dreadful chain of events\\n\",\n            \"that affects a few people quite harshly Alcoholism cold climate and darkness may all be clichés but\\n\",\n            \"theyre still very real in todays Finnish society A lot of people in Finland have depression\\n\",\n            \"especially during winterThe tone of the movie is very melancholic I enjoyed it and Louhimes\\n\",\n            \"directing was again very solid I liked this movie a lot only negative thing is that you see the same\\n\",\n            \"faces that youve seen over and over again\\n\",\n            \"\\n\",\n            \"Actual sentiment: positive\\n\",\n            \"Predicted sentiment: positive\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: Most of the criticism of Attack of Show is from people who are unfairly comparing it to an\\n\",\n            \"old computer TV program called The Screen Savers People are upset because G4 decided to cancel the\\n\",\n            \"Screen Savers and replace it with the pop culture based Attack of the Show To compare the two shows\\n\",\n            \"is like comparing apples to orangesAttack of the Show is a unique hour long program that covers\\n\",\n            \"current Generation XY culture It features segments on moviestelevision panel discussions video games\\n\",\n            \"new DVD releases sex advice new gadgets MP3 players cell phones etc comic booksgraphic novels\\n\",\n            \"magazines and internet fads Its a fun show definitely worth checking out you are in your 20s or 30s\\n\",\n            \"I give it an 8 out of 10\\n\",\n            \"\\n\",\n            \"Actual sentiment: positive\\n\",\n            \"Predicted sentiment: positive\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: This is the best comedy period It is so underrated Clever witty humor Great casting Jerry\\n\",\n            \"Stiller is the jewel in the show he is so incredibly funny and quirky simply a comical genius Doug\\n\",\n            \"and Carrie have great chemistry I so don't see what the hype is about when it comes to Everybody\\n\",\n            \"loves Raymond it is SO overrated with lame jokes mostly forced humor and just not the witty show I\\n\",\n            \"cant remember laughing in more than 1 episode King of Queens is a rare comedy that has all the right\\n\",\n            \"ingredients to give you serious belly laughs which is normally caused by Arthur Spooner I think its\\n\",\n            \"about time this comedy gets the hype it deserves and not the lame Raymond & CO\\n\",\n            \"\\n\",\n            \"Actual sentiment: positive\\n\",\n            \"Predicted sentiment: positive\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: Running Out of Time rests somewhere in the middle of Johnny Tos cannon in the solid good\\n\",\n            \"category As a crime thriller its not terribly original or overwhelming and the action scenes will\\n\",\n            \"not blow you away but it has something else going for it Its a Johnny To film after all it has\\n\",\n            \"toAndy Lau has 72 hours to live He decides to play a strange catandmouse game with a hostage\\n\",\n            \"negotiator of the HK police played by Lau Ching Wan Thats the plot in a nutshell On top of that To\\n\",\n            \"piles layers of twists and turns that keep proceedings interesting throughout It occasionally\\n\",\n            \"becomes too convoluted for its own sake but never lets it get the best of it However just as Johnny\\n\",\n            \"To is about to hand over a slick and wellmade crime flick which lets face it are dimeadozen he slips\\n\",\n            \"in bits and pieces that bring Running Out of Time alive as a full emotional experience providing the\\n\",\n            \"soul and heart to the welloiled skeletonThe concepts of synchronism and minimalism staples in his\\n\",\n            \"work are explored in great effect here Always subtle letting the images speak for themselves giving\\n\",\n            \"them time to develop with long takes and slow tracking shots exemplary cutting to the score its all\\n\",\n            \"here A small love story in a bus between Andy Lau and a girl is among the highlights of the film and\\n\",\n            \"part of the heart Im talking about So simple yet so powerful Ditto for Laus and Lau Ching Wans car\\n\",\n            \"scenes and the bowlingroom showdownHowever something stops me from claiming Running Out of Time is a\\n\",\n            \"masterpiece To has all the ability and craftmanship down to a notch but he can also be too\\n\",\n            \"workmanlike or bland at times When hes good hes REAL good There are even isolated moment of pure\\n\",\n            \"brilliance that are just TOO good for their own sake leaving a bittersweet aftertaste for the rest\\n\",\n            \"of the movie Im convinced that if he puts his heart to it he can make a really great film As it is\\n\",\n            \"this is another one of his films that is flawed but enjoyable Underneath the slick HK style its the\\n\",\n            \"black humour and heartfelt drama that makes this a compelling film Worth watching definitely\\n\",\n            \"\\n\",\n            \"Actual sentiment: positive\\n\",\n            \"Predicted sentiment: positive\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: Will there be please coming an end to hyping movies that are dealing about social conflicts\\n\",\n            \"or other human disasters Okay Care is about childabuse Care is about perverts misusing boys in a\\n\",\n            \"school and how disgusting it might be if its a movie with a poor script and made with bad playing\\n\",\n            \"actors then it stays a bad movie Care is a movie that could have been but is it because it was a\\n\",\n            \"tvmovie I dont know but everything seemed so limited that it comes over as some cheap movie that\\n\",\n            \"will be seen by some housewifes and fathers who decide not to go to bed There are so many unanswered\\n\",\n            \"things in this moviethe relation with his mother for instance or the death of some abused boy from\\n\",\n            \"which we know nothing more Care should have been much much better\\n\",\n            \"\\n\",\n            \"Actual sentiment: negative\\n\",\n            \"Predicted sentiment: negative\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: This movie appears to have been overlooked by everyone Someone should bring it out on VHS\\n\",\n            \"and DVD It is an excellent film and far superior to the one with Brooke Shields which was terrible\\n\",\n            \"Jean Simmons deserves more credit than she is getting now days It would be nice if all her films\\n\",\n            \"were offered on VHS or DVD Jean Simmons was and still is a very good actress She certainly was a\\n\",\n            \"beauty In fact she is still a beauty She also has done extremely well on TV She is so much better\\n\",\n            \"than many of the actors today\\n\",\n            \"\\n\",\n            \"Actual sentiment: positive\\n\",\n            \"Predicted sentiment: positive\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: When I heard there was to be an ABC Australian Broadcasting Corporation miniseries based on\\n\",\n            \"life in Changi WWII POW camp with a focus on elements of comedy I was deeply sceptical and somewhat\\n\",\n            \"criticalMy father had served in the second world war Such was the barbarity of the Japanese he was\\n\",\n            \"able to talk about the horrors in and around Labuan where he was stationed until only quite recently\\n\",\n            \"Along with my father I had been awarded the fortune of knowing many great men of stronger character\\n\",\n            \"and spirit than I shall ever have who had witnessed acts of unspeakable barbarity at the hands of\\n\",\n            \"the Empire of Japan and had never completely recovered The name Changi is destined to conjure\\n\",\n            \"horrific images for ages to comeBut upon viewing I was highly impressed with the cast the characters\\n\",\n            \"and the complex plotlines of this wonderful series I now regard Changi as the highlight of my week\\n\",\n            \"bear in mind I have viewed only three episodes so far I hope the remaining episodes adhere to the\\n\",\n            \"standards set by the first threeThe black humour works uncannily well however the flatulence jokes\\n\",\n            \"are a little overdone and while much of the horror has been suppressed the series comes quite close\\n\",\n            \"in relaying the undaunted spirit of the survivors who were able to later continue with their lives\\n\",\n            \"in spite of the inhibiting memoriesThe flashback format of this series will be difficult for some to\\n\",\n            \"followbut I can not think of no better way to do adequate justice to the men who suffered deep\\n\",\n            \"emotional scarring proceeding internment when painfully suppressed experiences are remembered\\n\",\n            \"sometimes years after the horrorOne of the darkest chapters of the Second World War the 20th century\\n\",\n            \"and I would go so far as to say in the history of mankind is being relayed to a new generation\\n\",\n            \"through this series and I hope it serves to relay the overwhelming adversity borne by the wartime\\n\",\n            \"generationProceeding Changi I dont think I shall ever be able to listen to the poignant tune on the\\n\",\n            \"road to Gundagai in the same way again Tune in\\n\",\n            \"\\n\",\n            \"Actual sentiment: positive\\n\",\n            \"Predicted sentiment: positive\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"Review: I was pretty enthusiastic about seeing this movie when it came out Commercials for it made\\n\",\n            \"it look quirky and I generally like Morgan Freeman and Chris Rock and the combination of the two\\n\",\n            \"seemed like an interesting idea Sadly I was terribly disappointed with Nurse BettyPersonally Ive\\n\",\n            \"usually found that graphic violence and comedy dont go all that well together and the only directors\\n\",\n            \"that have ever combined the two successfully in my opinion are Tarantino and the Coens There isnt\\n\",\n            \"that much violence in Nurse Betty but what violence is in it made me feel kind of weird when I was\\n\",\n            \"supposed to laugh Of course for me part of the problem was also that there didnt seem to be many\\n\",\n            \"places where I was being asked toThe film doesnt much work as a drama either Renee Zellwegers Betty\\n\",\n            \"the storys protagonist is clinically insane and impossible to relate to in any real way I will say\\n\",\n            \"Zellweger acts the role quite well and Freeman Rock and Greg Kinnear all do good jobs too The\\n\",\n            \"problem is in the writing Freeman is the only person that gets to play an interesting character Its\\n\",\n            \"really too bad 310\\n\",\n            \"\\n\",\n            \"Actual sentiment: negative\\n\",\n            \"Predicted sentiment: negative\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"lATfuiHYHq_1\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"Now predict on all the test dataset\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"lvWQGLXhzHtn\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"c0f5490b-2ade-4795-fa3d-1f0f1746e23c\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 66,\n          \"referenced_widgets\": [\n            \"6aaf51cb9ad44c94b6a174a8768904f7\",\n            \"51d23e1199274477a69557c74609afb2\",\n            \"029f74818c6842d7a28af62032418880\",\n            \"8db144e9144141779a1088c4bc000a99\",\n            \"210517aede4f4cfab9120fdeb3d8361a\",\n            \"df9bc2dc2b3c4fee98affdd7f5ca1ef6\",\n            \"b684a47485af4cb1934d57cbb03a4f57\",\n            \"942d20b134964d1d895af69938918464\"\n          ]\n        }\n      },\n      \"source\": [\n        \"loader = DataLoader(dataset, batch_size=32, num_workers=4)\\n\",\n        \"model.model.eval()\\n\",\n        \"outputs = []\\n\",\n        \"targets = []\\n\",\n        \"for batch in tqdm(loader):\\n\",\n        \"  outs = model.model.generate(input_ids=batch['source_ids'].cuda(), \\n\",\n        \"                              attention_mask=batch['source_mask'].cuda(), \\n\",\n        \"                              max_length=2)\\n\",\n        \"\\n\",\n        \"  dec = [tokenizer.decode(ids) for ids in outs]\\n\",\n        \"  target = [tokenizer.decode(ids) for ids in batch[\\\"target_ids\\\"]]\\n\",\n        \"  \\n\",\n        \"  outputs.extend(dec)\\n\",\n        \"  targets.extend(target)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"6aaf51cb9ad44c94b6a174a8768904f7\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=0.0, max=782.0), HTML(value='')))\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"ZBxEcXeWGafd\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"Let's check if the model generates any invalid text\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"Y_qylwYGXgwY\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"for i, out in enumerate(outputs):\\n\",\n        \"  if out not in ['positive', 'negative']:\\n\",\n        \"    print(i, 'detected invalid prediction')\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"MpU_VkFGIgnw\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"This great is great! Our model hasn't generated any invalid prediction. Let's calculate accuarcy and other metrics\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"EdJcQODoOChP\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"22fc6852-5443-43e4-d87e-5a5266ddffd9\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 34\n        }\n      },\n      \"source\": [\n        \"metrics.accuracy_score(targets, outputs)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"execute_result\",\n          \"data\": {\n            \"text/plain\": [\n              \"0.94712\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          },\n          \"execution_count\": 41\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"YepnSgI5OKti\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"a2914edf-d572-4166-a886-6c0d731835e5\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 168\n        }\n      },\n      \"source\": [\n        \"print(metrics.classification_report(targets, outputs))\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"              precision    recall  f1-score   support\\n\",\n            \"\\n\",\n            \"    negative       0.95      0.95      0.95     12500\\n\",\n            \"    positive       0.95      0.95      0.95     12500\\n\",\n            \"\\n\",\n            \"    accuracy                           0.95     25000\\n\",\n            \"   macro avg       0.95      0.95      0.95     25000\\n\",\n            \"weighted avg       0.95      0.95      0.95     25000\\n\",\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"UcZqrJELrRVw\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"Dhqigmiw2hVh\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"## Emotion classification\\n\",\n        \"\\n\",\n        \"While most of the sentiment-analysis datasets are binary with 'postive' and 'negative' sentiments, [Elvis Saravia](https://twitter.com/omarsar0)  has put together a great [dataset](https://github.com/dair-ai/emotion_dataset) for emotion recognition. The task is given some text classifiy the text into one of the following six emotions \\n\",\n        \"\\n\",\n        \"'sadness', 'joy', 'anger', 'fear', 'surprise', 'love'.\\n\",\n        \"\\n\",\n        \"Here's the [original notebook](https://colab.research.google.com/drive/1nwCE6b9PXIKhv2hvbqf1oZKIGkXMTi1X#scrollTo=pSzoz9InH0Ta) which trains ROBERTa model to classify the text\"\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"0B4IhzEgO21B\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"### Download and view data\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"6eQhtsD65svj\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"a46f0a9a-27bb-4d10-c7a3-b45b3c894526\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 1000\n        }\n      },\n      \"source\": [\n        \"!wget https://www.dropbox.com/s/ikkqxfdbdec3fuj/test.txt\\n\",\n        \"!wget https://www.dropbox.com/s/1pzkadrvffbqw6o/train.txt\\n\",\n        \"!wget https://www.dropbox.com/s/2mzialpsgf9k5l3/val.txt\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"--2020-05-09 06:38:34--  https://www.dropbox.com/s/ikkqxfdbdec3fuj/test.txt\\n\",\n            \"Resolving www.dropbox.com (www.dropbox.com)... 162.125.9.1, 2620:100:601f:1::a27d:901\\n\",\n            \"Connecting to www.dropbox.com (www.dropbox.com)|162.125.9.1|:443... connected.\\n\",\n            \"HTTP request sent, awaiting response... 301 Moved Permanently\\n\",\n            \"Location: /s/raw/ikkqxfdbdec3fuj/test.txt [following]\\n\",\n            \"--2020-05-09 06:38:34--  https://www.dropbox.com/s/raw/ikkqxfdbdec3fuj/test.txt\\n\",\n            \"Reusing existing connection to www.dropbox.com:443.\\n\",\n            \"HTTP request sent, awaiting response... 302 Found\\n\",\n            \"Location: https://ucee0a4eb59e9a79892dc4e0e239.dl.dropboxusercontent.com/cd/0/inline/A3U44u6Qw37AC-ysKv8gHOrYJywB8BLj-SWf4UtgNCVbhch6g7hz1JW0yVUjMcYZGe1daItFDuwZIfhuumccC7WN93mOzuubDPQ-xL4xEAH-ugpp_5KfcQCc-l4yNej1pUo/file# [following]\\n\",\n            \"--2020-05-09 06:38:35--  https://ucee0a4eb59e9a79892dc4e0e239.dl.dropboxusercontent.com/cd/0/inline/A3U44u6Qw37AC-ysKv8gHOrYJywB8BLj-SWf4UtgNCVbhch6g7hz1JW0yVUjMcYZGe1daItFDuwZIfhuumccC7WN93mOzuubDPQ-xL4xEAH-ugpp_5KfcQCc-l4yNej1pUo/file\\n\",\n            \"Resolving ucee0a4eb59e9a79892dc4e0e239.dl.dropboxusercontent.com (ucee0a4eb59e9a79892dc4e0e239.dl.dropboxusercontent.com)... 162.125.9.6, 2620:100:601f:6::a27d:906\\n\",\n            \"Connecting to ucee0a4eb59e9a79892dc4e0e239.dl.dropboxusercontent.com (ucee0a4eb59e9a79892dc4e0e239.dl.dropboxusercontent.com)|162.125.9.6|:443... connected.\\n\",\n            \"HTTP request sent, awaiting response... 200 OK\\n\",\n            \"Length: 206760 (202K) [text/plain]\\n\",\n            \"Saving to: ‘test.txt’\\n\",\n            \"\\n\",\n            \"\\rtest.txt              0%[                    ]       0  --.-KB/s               \\rtest.txt            100%[===================>] 201.91K  --.-KB/s    in 0.07s   \\n\",\n            \"\\n\",\n            \"2020-05-09 06:38:35 (2.79 MB/s) - ‘test.txt’ saved [206760/206760]\\n\",\n            \"\\n\",\n            \"--2020-05-09 06:38:37--  https://www.dropbox.com/s/1pzkadrvffbqw6o/train.txt\\n\",\n            \"Resolving www.dropbox.com (www.dropbox.com)... 162.125.9.1, 2620:100:601f:1::a27d:901\\n\",\n            \"Connecting to www.dropbox.com (www.dropbox.com)|162.125.9.1|:443... connected.\\n\",\n            \"HTTP request sent, awaiting response... 301 Moved Permanently\\n\",\n            \"Location: /s/raw/1pzkadrvffbqw6o/train.txt [following]\\n\",\n            \"--2020-05-09 06:38:38--  https://www.dropbox.com/s/raw/1pzkadrvffbqw6o/train.txt\\n\",\n            \"Reusing existing connection to www.dropbox.com:443.\\n\",\n            \"HTTP request sent, awaiting response... 302 Found\\n\",\n            \"Location: https://uce8d13a7ac2bd3bc99f493e9cdc.dl.dropboxusercontent.com/cd/0/inline/A3VHSDKXCes7IMws7cQCAbiXyNW7dOk9CYhiTjzghv3EcDtNVLX37OVjW43i4mNdbrYOdNNhfqFlbysOgj9PUEvDo4b1Uq_2cChy-FGiz0-mNYIc07lv7AoBSphOulwSRY8/file# [following]\\n\",\n            \"--2020-05-09 06:38:38--  https://uce8d13a7ac2bd3bc99f493e9cdc.dl.dropboxusercontent.com/cd/0/inline/A3VHSDKXCes7IMws7cQCAbiXyNW7dOk9CYhiTjzghv3EcDtNVLX37OVjW43i4mNdbrYOdNNhfqFlbysOgj9PUEvDo4b1Uq_2cChy-FGiz0-mNYIc07lv7AoBSphOulwSRY8/file\\n\",\n            \"Resolving uce8d13a7ac2bd3bc99f493e9cdc.dl.dropboxusercontent.com (uce8d13a7ac2bd3bc99f493e9cdc.dl.dropboxusercontent.com)... 162.125.9.6, 2620:100:601f:6::a27d:906\\n\",\n            \"Connecting to uce8d13a7ac2bd3bc99f493e9cdc.dl.dropboxusercontent.com (uce8d13a7ac2bd3bc99f493e9cdc.dl.dropboxusercontent.com)|162.125.9.6|:443... connected.\\n\",\n            \"HTTP request sent, awaiting response... 200 OK\\n\",\n            \"Length: 1658616 (1.6M) [text/plain]\\n\",\n            \"Saving to: ‘train.txt’\\n\",\n            \"\\n\",\n            \"train.txt           100%[===================>]   1.58M  --.-KB/s    in 0.1s    \\n\",\n            \"\\n\",\n            \"2020-05-09 06:38:38 (13.8 MB/s) - ‘train.txt’ saved [1658616/1658616]\\n\",\n            \"\\n\",\n            \"--2020-05-09 06:38:41--  https://www.dropbox.com/s/2mzialpsgf9k5l3/val.txt\\n\",\n            \"Resolving www.dropbox.com (www.dropbox.com)... 162.125.9.1, 2620:100:601f:1::a27d:901\\n\",\n            \"Connecting to www.dropbox.com (www.dropbox.com)|162.125.9.1|:443... connected.\\n\",\n            \"HTTP request sent, awaiting response... 301 Moved Permanently\\n\",\n            \"Location: /s/raw/2mzialpsgf9k5l3/val.txt [following]\\n\",\n            \"--2020-05-09 06:38:41--  https://www.dropbox.com/s/raw/2mzialpsgf9k5l3/val.txt\\n\",\n            \"Reusing existing connection to www.dropbox.com:443.\\n\",\n            \"HTTP request sent, awaiting response... 302 Found\\n\",\n            \"Location: https://uccfba89432e5bdbb64e910d3444.dl.dropboxusercontent.com/cd/0/inline/A3Uqwy1biv6Ipmcdb5FAQtm-d1nMaHHQJKrKTqu-TusvptAdDtwpPRPxMIuZovISOPSJPhwNP1imjPtokJO3KO6OlofN61eqKzGDn-P7BovjRs9wkVRJW0HjMMaz8Q5vmGU/file# [following]\\n\",\n            \"--2020-05-09 06:38:42--  https://uccfba89432e5bdbb64e910d3444.dl.dropboxusercontent.com/cd/0/inline/A3Uqwy1biv6Ipmcdb5FAQtm-d1nMaHHQJKrKTqu-TusvptAdDtwpPRPxMIuZovISOPSJPhwNP1imjPtokJO3KO6OlofN61eqKzGDn-P7BovjRs9wkVRJW0HjMMaz8Q5vmGU/file\\n\",\n            \"Resolving uccfba89432e5bdbb64e910d3444.dl.dropboxusercontent.com (uccfba89432e5bdbb64e910d3444.dl.dropboxusercontent.com)... 162.125.9.6, 2620:100:601f:6::a27d:906\\n\",\n            \"Connecting to uccfba89432e5bdbb64e910d3444.dl.dropboxusercontent.com (uccfba89432e5bdbb64e910d3444.dl.dropboxusercontent.com)|162.125.9.6|:443... connected.\\n\",\n            \"HTTP request sent, awaiting response... 200 OK\\n\",\n            \"Length: 204240 (199K) [text/plain]\\n\",\n            \"Saving to: ‘val.txt’\\n\",\n            \"\\n\",\n            \"val.txt             100%[===================>] 199.45K  --.-KB/s    in 0.07s   \\n\",\n            \"\\n\",\n            \"2020-05-09 06:38:42 (2.75 MB/s) - ‘val.txt’ saved [204240/204240]\\n\",\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"yVrcVbvx74G5\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"!mkdir emotion_data\\n\",\n        \"!mv *.txt emotion_data\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"jOpnh3Y06BGU\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"train_path = \\\"emotion_data/train.txt\\\"\\n\",\n        \"test_path = \\\"emotion_data/test.txt\\\"\\n\",\n        \"val_path = \\\"emotion_data/val.txt\\\"\\n\",\n        \"\\n\",\n        \"## emotion labels\\n\",\n        \"label2int = {\\n\",\n        \"  \\\"sadness\\\": 0,\\n\",\n        \"  \\\"joy\\\": 1,\\n\",\n        \"  \\\"love\\\": 2,\\n\",\n        \"  \\\"anger\\\": 3,\\n\",\n        \"  \\\"fear\\\": 4,\\n\",\n        \"  \\\"surprise\\\": 5\\n\",\n        \"}\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"r4sDek6T8PXE\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"a061ba43-03d8-4fdc-b715-b6fca8d57388\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 313\n        }\n      },\n      \"source\": [\n        \"data = pd.read_csv(train_path, sep=\\\";\\\", header=None, names=['text', 'emotion'],\\n\",\n        \"                               engine=\\\"python\\\")\\n\",\n        \"data.emotion.value_counts().plot.bar()\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"execute_result\",\n          \"data\": {\n            \"text/plain\": [\n              \"<matplotlib.axes._subplots.AxesSubplot at 0x7f20bb40cfd0>\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          },\n          \"execution_count\": 49\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"image/png\": \"iVBORw0KGgoAAAANSUhEUgAAAX0AAAEXCAYAAABBFpRtAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAWn0lEQVR4nO3df9AlVX3n8fdHEH/EH6BMKATiYJzoYlQ0I7Ar2SiEX6JCFA2uP0ZDMlUuRuO6u2JWl4piCmNKd01FVhQiuolINCoLKk5Q1qhRGBBBUMKIUMKiTBwEIwEFv/tHnwevOMPzDHPn9sw971fVU7f7dN/b32a4n9v39Om+qSokSX2439gFSJJmx9CXpI4Y+pLUEUNfkjpi6EtSRwx9SerIjmMXcG923XXXWr58+dhlSNJ25eKLL/7nqlq2sWXbdOgvX76ctWvXjl2GJG1Xkly3qWV270hSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6sk1fnHVfLT/h3Jlu79qTj5zp9iTpvvJIX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHlhT6Sa5NcnmSS5OsbW2PSLImydXtcZfWniTvSrIuyWVJnjrxOqva+lcnWbV1dkmStCmbc6T/zKrat6pWtvkTgPOragVwfpsHOAJY0f5WA6fA8CEBnAjsD+wHnLjwQSFJmo0t6d45CjijTZ8BHD3R/oEafBnYOcnuwGHAmqraUFU3A2uAw7dg+5KkzbTU0C/gM0kuTrK6te1WVTe26e8Cu7XpPYDvTDz3+ta2qfafk2R1krVJ1q5fv36J5UmSlmKpt1Y+sKpuSPLLwJok35xcWFWVpKZRUFWdCpwKsHLlyqm8piRpsKQj/aq6oT3eBHyMoU/+e63bhvZ4U1v9BmCviafv2do21S5JmpFFQz/JLyV56MI0cCjwdeBsYGEEzirgE236bOBlbRTPAcAtrRvoPODQJLu0E7iHtjZJ0owspXtnN+BjSRbW/5uq+nSSi4CzkhwHXAe8sK3/SeBZwDrgNuAVAFW1IclbgIvaem+uqg1T2xNJ0qIWDf2qugZ48kbavw8cvJH2Ao7fxGudDpy++WVKkqbBK3IlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SerIUu+9o23I8hPOnen2rj35yJluT9LW45G+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkeWHPpJdkjy1STntPm9k3wlybokH06yU2t/QJtf15Yvn3iNN7T2q5IcNu2dkSTdu8050n8N8I2J+bcB76yqxwI3A8e19uOAm1v7O9t6JNkHOBZ4AnA48O4kO2xZ+ZKkzbGk0E+yJ3Ak8L42H+Ag4CNtlTOAo9v0UW2etvzgtv5RwJlVdUdVfRtYB+w3jZ2QJC3NUo/0/wfwX4GftvlHAj+oqjvb/PXAHm16D+A7AG35LW39u9s38hxJ0gwsGvpJng3cVFUXz6AekqxOsjbJ2vXr189ik5LUjaUc6T8deG6Sa4EzGbp1/iewc5Id2zp7Aje06RuAvQDa8ocD359s38hz7lZVp1bVyqpauWzZss3eIUnSpi0a+lX1hqras6qWM5yI/WxVvRj4HHBMW20V8Ik2fXabpy3/bFVVaz+2je7ZG1gBXDi1PZEkLWrHxVfZpNcDZyY5CfgqcFprPw34YJJ1wAaGDwqq6ookZwFXAncCx1fVXVuwfUnSZtqs0K+qC4AL2vQ1bGT0TVXdDrxgE89/K/DWzS1SkjQdXpErSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOrJo6Cd5YJILk3wtyRVJ/qS1753kK0nWJflwkp1a+wPa/Lq2fPnEa72htV+V5LCttVOSpI1bypH+HcBBVfVkYF/g8CQHAG8D3llVjwVuBo5r6x8H3Nza39nWI8k+wLHAE4DDgXcn2WGaOyNJuneLhn4N/qXN3r/9FXAQ8JHWfgZwdJs+qs3Tlh+cJK39zKq6o6q+DawD9pvKXkiSlmRJffpJdkhyKXATsAb4FvCDqrqzrXI9sEeb3gP4DkBbfgvwyMn2jTxnclurk6xNsnb9+vWbv0eSpE1aUuhX1V1VtS+wJ8PR+eO3VkFVdWpVrayqlcuWLdtam5GkLm3W6J2q+gHwOeDfAjsn2bEt2hO4oU3fAOwF0JY/HPj+ZPtGniNJmoGljN5ZlmTnNv0g4BDgGwzhf0xbbRXwiTZ9dpunLf9sVVVrP7aN7tkbWAFcOK0dkSQtbsfFV2F34Iw20uZ+wFlVdU6SK4Ezk5wEfBU4ra1/GvDBJOuADQwjdqiqK5KcBVwJ3AkcX1V3TXd3JEn3ZtHQr6rLgKdspP0aNjL6pqpuB16widd6K/DWzS9TkjQNXpErSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHVkx7ELkO5p+QnnznR715585Ey3J43JI31J6siioZ9krySfS3JlkiuSvKa1PyLJmiRXt8ddWnuSvCvJuiSXJXnqxGutautfnWTV1tstSdLGLOVI/07gdVW1D3AAcHySfYATgPOragVwfpsHOAJY0f5WA6fA8CEBnAjsD+wHnLjwQSFJmo1FQ7+qbqyqS9r0D4FvAHsARwFntNXOAI5u00cBH6jBl4Gdk+wOHAasqaoNVXUzsAY4fKp7I0m6V5vVp59kOfAU4CvAblV1Y1v0XWC3Nr0H8J2Jp13f2jbVLkmakSWHfpKHAB8F/qiqbp1cVlUF1DQKSrI6ydoka9evXz+Nl5QkNUsK/ST3Zwj8v66qv2vN32vdNrTHm1r7DcBeE0/fs7Vtqv3nVNWpVbWyqlYuW7Zsc/ZFkrSIRcfpJwlwGvCNqnrHxKKzgVXAye3xExPtr0pyJsNJ21uq6sYk5wF/OnHy9lDgDdPZDWn74XUIGtNSLs56OvBS4PIkl7a2P2YI+7OSHAdcB7ywLfsk8CxgHXAb8AqAqtqQ5C3ARW29N1fVhqnshSRpSRYN/ar6ApBNLD54I+sXcPwmXut04PTNKVCSND1ekStJHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjqy49gFSJofy084d6bbu/bkI2e6vXngkb4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR1ZNPSTnJ7kpiRfn2h7RJI1Sa5uj7u09iR5V5J1SS5L8tSJ56xq61+dZNXW2R1J0r1ZypH++4HD79F2AnB+Va0Azm/zAEcAK9rfauAUGD4kgBOB/YH9gBMXPigkSbOzaOhX1eeBDfdoPgo4o02fARw90f6BGnwZ2DnJ7sBhwJqq2lBVNwNr+MUPEknSVnZf+/R3q6ob2/R3gd3a9B7AdybWu761bar9FyRZnWRtkrXr16+/j+VJkjZmi0/kVlUBNYVaFl7v1KpaWVUrly1bNq2XlSRx30P/e63bhvZ4U2u/AdhrYr09W9um2iVJM3RfQ/9sYGEEzirgExPtL2ujeA4AbmndQOcBhybZpZ3APbS1SZJmaNFbKyf5EPAMYNck1zOMwjkZOCvJccB1wAvb6p8EngWsA24DXgFQVRuSvAW4qK335qq658lhSdJWtmjoV9WLNrHo4I2sW8Dxm3id04HTN6s6SdJUeUWuJHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUkUXvvSNJGiw/4dyZbu/ak4+c+mt6pC9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSMzD/0khye5Ksm6JCfMevuS1LOZhn6SHYC/BI4A9gFelGSfWdYgST2b9ZH+fsC6qrqmqn4MnAkcNeMaJKlbqarZbSw5Bji8qn6/zb8U2L+qXjWxzmpgdZt9HHDVzAqEXYF/nuH2Zs39277N8/7N877B7Pfv0VW1bGMLdpxhEUtSVacCp46x7SRrq2rlGNueBfdv+zbP+zfP+wbb1v7NunvnBmCvifk9W5skaQZmHfoXASuS7J1kJ+BY4OwZ1yBJ3Zpp905V3ZnkVcB5wA7A6VV1xSxrWMQo3Uoz5P5t3+Z5/+Z532Ab2r+ZnsiVJI3LK3IlqSOGviR1pOvQT/KcJF3/N5DUl94D73eBq5P8WZLHj13M1pZklyRPGruOachgr8XXlDSp69CvqpcATwG+Bbw/yT8mWZ3koSOXNjVJLkjysCSPAC4B3pvkHWPXtaVqGIHwybHr2FqS7JDkm2PXsbUleXSS327TD5qz995uSU5L8qk2v0+S48auq+vQB6iqW4GPMNwHaHfgd4BLkvzhqIVNz8PbPj4P+EBV7Q/89sg1TcslSZ42dhFbQ1XdBVyV5FfGrmVrSfIHDO+997SmPYGPj1fR1L2fYXj6o9r8PwF/NFo1Tdehn+S5ST4GXADcH9ivqo4Angy8bszapmjHJLsDLwTOGbuYKdsf+Mck30pyWZLLk1w2dlFTtAtwRZLzk5y98Dd2UVN0PPB04FaAqroa+OVRK5quXavqLOCnMFynBNw1bknb4L13Zuz5wDur6vOTjVV127bwNWxK3sxwtPGFqrooyWOAq0euaVoOG7uArexNYxewld1RVT9OAkCSHYF5unDoR0keSdunJAcAt4xbkhdnkWQ3YKGL4MKqumnMerR5khwIrKiqv0qyDHhIVX177Lq0uCR/BvwAeBnwh8B/BK6sqv82amFTkuSpwF8Avw58HVgGHFNVo34b7Tr0k7wA+HOG7p0Avwn8l6r6yJh1TVN7Y50E/CvwaeBJwGur6n+PWtgUJDkRWAk8rqp+LcmjgL+tqqePXNpUtCPDvwD+DbATw61LflRVDxu1sClpw6WPAw5leP+dB7yv5iiU2reXxzHs31VV9ZORS+o+9L8GHLJwdN+OFP++qp48bmXTk+TSqto3ye8Azwb+E/D5edjHJJcyjL66pKqe0touq6p5GZa6luGmhH/L8OH2MuDXquoNoxY2JUmeB5xbVXeMXcvW0A4qP11VP0zyRuCpwElVdcmYdXV9Ihe43z26c77P/P03WThvcyTDUfDofYpT9ON2VLjQZ/pLI9czdVW1Dtihqu6qqr8CDh+7pil6DvBPST6Y5NntqHievKkF/oHAwcBpwCkj1zR3Abe5Pp3kvCQvT/JyhnHfnxq5pmk7p433/g3g/PZt5vaRa5qWs5K8B9i5Df/7e+C9I9c0Tbe1W5Bf2i4gfC1z9J6tqlcAj2X4JvMi4FtJ3jduVVO1MFLnSOC9VXUuQzfdqLru3oG7v2Iu9AH/Q1XN0zhhANqFWbdU1V3taPihVfXdseuahiSHMNEnXFVrRi5papI8GvgeQ1C8Fng48O529D83ktyf4RvMK4B/X1W7jlzSVCQ5h+FHog5h6Nr5V4bBIqN2rXYZ+km+UFUHJvkhQ9dAJhb/FNgAvL2q3j1KgVOU5MEM/fi/UlWrk6xgOPE5b2P251KSBzH8283yt6JnIskRDLdCeQbDYIqzgM+08ezbvfbeOxy4vKqubtfLPLGqPjNqXT2G/mLa2NovVdXjxq5lSyX5MHAx8LKq+vX2P+KXqmrfkUvbYhMf2pNuAdYCr6uqa2Zf1fQkeQ7D6LKdqmrvJPsCb66q545c2lQk+RDwYeBT83QyN8nDqurW9g37F1TVhlnXNMnQ34Qku1fVjWPXsaUWfpA5yVcnRrh8beyvmNOQ5C3A9cDfMHxbOxb4VYZ7DL2yqp4xXnVbLsnFwEHABRP/dpdX1RPHrWx65vE6mSTnVNWzk3ybX+xJqKp6zEilAXN0Umja5iHwmx+3LoKFES6/CszLUdVzq+o9VfXDqrq1qk4FDquqDzPcwmB795ONjLaam6O0NqTxQuAFDLcJ+UqSY8atasu1wA/wW1X1mKrae+Jv1MAHb8PQgxMZLsraK8lfM5y0fvmoFU3PbUleyHDTLoBj+NnIpHkIxyuS/Adgh3Yu5tXAl0auaZreCDztntfJ8LN/z+1WVVWSc4Ft7luZR/pzro1meR5D0H8IWFlVF4xZ0xS9GHgpcBPDKJeXAi9p32xeNWZhWyLJB9vkt4AnMHwz+xDDjclGv0vjFM37dTLb5F1g7dPvQJI9gEcz8c3unjeZ07YjyZUMt7/+FPDMey4f+0TgtCR5O8NtQT7Umn4XuKyqXj9eVdPTro95LHAd8COGvv0a+4pxQ3/OJXkbw5vpCtotXhn+x9vuR4C07oA/AJbz8x9ovzdWTdOQ5NXAK4HHMIzzvnsR28CJwGlK8nx+/jqZj41ZzzS16yx+QVVdN+taJhn6cy7JVcCT5mlI3IIkXwL+gWFI6t33Ka+qj45W1BQlOaWqXjl2Hbrv2p02D2Q4x/TFse+7A4b+3Gs/1faCqvqXsWuZtoWbyY1dhzbPJq6vgJ99k5mXu4j+d4aRSX/Xmo5muP/VSeNVZejPvSQfZfglsPOZGKpZVa8eragpSXISw4Vmc/tbudp+tW/ZT66q29v8g4BLx77o0yGb8+/s9jePXgP8cZI7gJ8wZ0eK2u79P+CB/GwY8QP4+XM0o/BIX9u1dqn7CoY3FwBV9X/Hq0gaJPk4w9XGaxi6sw5huBjtehjv27ahP6eSXM69XKA09rCxaUjy+wxH+3sClwIHMHT3HDxqYRKQZNW9La+qM2ZVyyS7d+bXs9vj8e1x4YKflzAfV6vCEPhPA75cVc9M8njgT0euSSLJDsChVfXisWu5J0N/Ti2MBU5yyMLNuprXJ7kEOGGcyqbq9qq6PQlJHlBV30yy3d8ZVdu/9tsVj06yU1X9eOx6Jhn68y9Jnl5VX2wz/475udT9+iQ7Ax8H1iS5meHqR2lbcA3wxSRnM1yRC0BVvWO8kuzTn3tJfgM4neFXlwLcDPzetnCRyDQl+S2Gffz0tnZkpT4lOXFj7VX1J7OuZZKh34kkDweYsx9Gl7SZDP0OJDmS4W6Nk8Ma3zxeRdL8S/I5NjJooqoOGqGcu9mnP+eS/C/gwQx3a3wfwz3nLxy1KKkP/3li+oHA84HRf//XI/05l+SyqnrSxONDGH6T9DfHrk3qTZILq2q/MWvwSH/+LVwCfluSRwEbgN1HrEfqwj1+GP1+wEqGwQajMvTn3/9pwxrfzvCD4QW8d9ySpC5czM9+GP0nwLXAcWMWBPMzXlub9k3grnaP+b8Evswwrl3S1vV6YN+q2pvhivgfAbeNW5Kh34M3VdUPkxwIHMRwMveUkWuSevDGqrp1W3vvGfrzb+EXpY4E3ltV5wI7jViP1Itt8r1n6M+/G5K8h+F3cj+Z5AH47y7Nwjb53nPI5pxL8mDgcODyqro6ye7AE6vqMyOXJs21bfW9Z+hLUkdG/6ohSZodQ1+SOmLoS1JHDH1J6oihL0kd+f/K/NV+jg5JdwAAAABJRU5ErkJggg==\\n\",\n            \"text/plain\": [\n              \"<Figure size 432x288 with 1 Axes>\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": [],\n            \"needs_background\": \"light\"\n          }\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"EaKp3E1T8kkm\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"7b0fa7d2-199e-4e6e-b895-1d216a1be7b8\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 195\n        }\n      },\n      \"source\": [\n        \"train.head()\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"execute_result\",\n          \"data\": {\n            \"text/html\": [\n              \"<div>\\n\",\n              \"<style scoped>\\n\",\n              \"    .dataframe tbody tr th:only-of-type {\\n\",\n              \"        vertical-align: middle;\\n\",\n              \"    }\\n\",\n              \"\\n\",\n              \"    .dataframe tbody tr th {\\n\",\n              \"        vertical-align: top;\\n\",\n              \"    }\\n\",\n              \"\\n\",\n              \"    .dataframe thead th {\\n\",\n              \"        text-align: right;\\n\",\n              \"    }\\n\",\n              \"</style>\\n\",\n              \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n              \"  <thead>\\n\",\n              \"    <tr style=\\\"text-align: right;\\\">\\n\",\n              \"      <th></th>\\n\",\n              \"      <th>text</th>\\n\",\n              \"      <th>emotion</th>\\n\",\n              \"    </tr>\\n\",\n              \"  </thead>\\n\",\n              \"  <tbody>\\n\",\n              \"    <tr>\\n\",\n              \"      <th>0</th>\\n\",\n              \"      <td>i didnt feel humiliated</td>\\n\",\n              \"      <td>sadness</td>\\n\",\n              \"    </tr>\\n\",\n              \"    <tr>\\n\",\n              \"      <th>1</th>\\n\",\n              \"      <td>i can go from feeling so hopeless to so damned...</td>\\n\",\n              \"      <td>sadness</td>\\n\",\n              \"    </tr>\\n\",\n              \"    <tr>\\n\",\n              \"      <th>2</th>\\n\",\n              \"      <td>im grabbing a minute to post i feel greedy wrong</td>\\n\",\n              \"      <td>anger</td>\\n\",\n              \"    </tr>\\n\",\n              \"    <tr>\\n\",\n              \"      <th>3</th>\\n\",\n              \"      <td>i am ever feeling nostalgic about the fireplac...</td>\\n\",\n              \"      <td>love</td>\\n\",\n              \"    </tr>\\n\",\n              \"    <tr>\\n\",\n              \"      <th>4</th>\\n\",\n              \"      <td>i am feeling grouchy</td>\\n\",\n              \"      <td>anger</td>\\n\",\n              \"    </tr>\\n\",\n              \"  </tbody>\\n\",\n              \"</table>\\n\",\n              \"</div>\"\n            ],\n            \"text/plain\": [\n              \"                                                text  emotion\\n\",\n              \"0                            i didnt feel humiliated  sadness\\n\",\n              \"1  i can go from feeling so hopeless to so damned...  sadness\\n\",\n              \"2   im grabbing a minute to post i feel greedy wrong    anger\\n\",\n              \"3  i am ever feeling nostalgic about the fireplac...     love\\n\",\n              \"4                               i am feeling grouchy    anger\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          },\n          \"execution_count\": 50\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"i-Gt1WyPBL-6\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"5ca664c8-5a05-4e8c-a15b-b66891f3e164\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 67\n        }\n      },\n      \"source\": [\n        \"train.count()\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"execute_result\",\n          \"data\": {\n            \"text/plain\": [\n              \"text       16000\\n\",\n              \"emotion    16000\\n\",\n              \"dtype: int64\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          },\n          \"execution_count\": 51\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"KybpXVl1Die5\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"1319d2b5-c84e-4c95-bae6-3af745326439\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 186,\n          \"referenced_widgets\": [\n            \"0037bb8409bb4d65ac4ebd956fd1e631\",\n            \"db528e3117024014b4d281b650901cbd\",\n            \"350fc08aa59849fc9fd3f3e454583a6c\",\n            \"be936dd408314d0d90a22f627ca517ca\",\n            \"99f56e1a8fdb4b2282fa6e17819d044e\",\n            \"462bd815ddbc4687bcf7695f59919f0c\",\n            \"40edb7d92c1145ee9e3bb823e4688e16\",\n            \"f827cd8a6bf846c590913c5ea40e6737\"\n          ]\n        }\n      },\n      \"source\": [\n        \"tokenizer = T5Tokenizer.from_pretrained('t5-base')\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:filelock:Lock 139955425093728 acquired on /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f.lock\\n\",\n            \"INFO:transformers.file_utils:https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmpsnkx0l2g\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"0037bb8409bb4d65ac4ebd956fd1e631\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=791656.0, style=ProgressStyle(descripti…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:transformers.file_utils:storing https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model in cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\\n\",\n            \"INFO:transformers.file_utils:creating metadata file for /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\\n\",\n            \"INFO:filelock:Lock 139955425093728 released on /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f.lock\\n\",\n            \"INFO:transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model from cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"cANrUEXhO8QY\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"### Dataset\"\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"8GsMQdqMPCN7\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"Here also we will process the examples in the same way we did above. If the label is 'love' we will ask the model to predict the text 'love'\"\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"AKh6m92eKZc4\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"Lets check how t5 encodes the following labels\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"HDnMp5-fDIAc\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"837d1d28-2d17-4ff0-f345-64eed6949dbb\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 118\n        }\n      },\n      \"source\": [\n        \"emotions = [ \\\"sadness\\\", \\\"joy\\\", \\\"love\\\", \\\"anger\\\", \\\"fear\\\", \\\"surprise\\\"]\\n\",\n        \"for em in emotions:\\n\",\n        \"  print(len(tokenizer.encode(em)))\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"1\\n\",\n            \"1\\n\",\n            \"1\\n\",\n            \"1\\n\",\n            \"1\\n\",\n            \"1\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"i8VIZIWFOwMj\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"Here also all the labels are encoded as single ids\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"8i8QD-3MDrWq\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"class EmotionDataset(Dataset):\\n\",\n        \"  def __init__(self, tokenizer, data_dir, type_path,  max_len=512):\\n\",\n        \"    self.path = os.path.join(data_dir, type_path + '.txt')\\n\",\n        \"\\n\",\n        \"    self.data_column = \\\"text\\\"\\n\",\n        \"    self.class_column = \\\"emotion\\\"\\n\",\n        \"    self.data = pd.read_csv(self.path, sep=\\\";\\\", header=None, names=[self.data_column, self.class_column],\\n\",\n        \"                            engine=\\\"python\\\")\\n\",\n        \"    \\n\",\n        \"    self.max_len = max_len\\n\",\n        \"    self.tokenizer = tokenizer\\n\",\n        \"    self.inputs = []\\n\",\n        \"    self.targets = []\\n\",\n        \"\\n\",\n        \"    self._build()\\n\",\n        \"  \\n\",\n        \"  def __len__(self):\\n\",\n        \"    return len(self.inputs)\\n\",\n        \"  \\n\",\n        \"  def __getitem__(self, index):\\n\",\n        \"    source_ids = self.inputs[index][\\\"input_ids\\\"].squeeze()\\n\",\n        \"    target_ids = self.targets[index][\\\"input_ids\\\"].squeeze()\\n\",\n        \"\\n\",\n        \"    src_mask    = self.inputs[index][\\\"attention_mask\\\"].squeeze()  # might need to squeeze\\n\",\n        \"    target_mask = self.targets[index][\\\"attention_mask\\\"].squeeze()  # might need to squeeze\\n\",\n        \"\\n\",\n        \"    return {\\\"source_ids\\\": source_ids, \\\"source_mask\\\": src_mask, \\\"target_ids\\\": target_ids, \\\"target_mask\\\": target_mask}\\n\",\n        \"  \\n\",\n        \"  def _build(self):\\n\",\n        \"    for idx in range(len(self.data)):\\n\",\n        \"      input_, target = self.data.loc[idx, self.data_column], self.data.loc[idx, self.class_column]      \\n\",\n        \"      \\n\",\n        \"      input_ = input_ + ' </s>'\\n\",\n        \"      target = target + \\\" </s>\\\"\\n\",\n        \"\\n\",\n        \"       # tokenize inputs\\n\",\n        \"      tokenized_inputs = self.tokenizer.batch_encode_plus(\\n\",\n        \"          [input_], max_length=self.max_len, pad_to_max_length=True, return_tensors=\\\"pt\\\"\\n\",\n        \"      )\\n\",\n        \"       # tokenize targets\\n\",\n        \"      tokenized_targets = self.tokenizer.batch_encode_plus(\\n\",\n        \"          [target], max_length=2, pad_to_max_length=True, return_tensors=\\\"pt\\\"\\n\",\n        \"      )\\n\",\n        \"\\n\",\n        \"      self.inputs.append(tokenized_inputs)\\n\",\n        \"      self.targets.append(tokenized_targets)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"kRz5jyl3FBkv\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"b3587087-efa7-400b-f3f4-ebc958deb33d\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 34\n        }\n      },\n      \"source\": [\n        \"dataset = EmotionDataset(tokenizer, 'emotion_data', 'val', 512)\\n\",\n        \"len(dataset)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"execute_result\",\n          \"data\": {\n            \"text/plain\": [\n              \"2000\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          },\n          \"execution_count\": 54\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"jxT6QzUAFQN0\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"68122a3a-bf3e-4125-f768-a6410abed5a9\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 50\n        }\n      },\n      \"source\": [\n        \"data = dataset[42]\\n\",\n        \"print(tokenizer.decode(data['source_ids']))\\n\",\n        \"print(tokenizer.decode(data['target_ids']))\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"i honestly don't feel discouraged today as i usually do\\n\",\n            \"sadness\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"PBVHtdIuFpID\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"DEWi6c-pGZV9\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"### Train\"\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"wGrpDJnLPQ0Q\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"As I said above there's no need to change the model or add task specific head or any other hyperparameters, we'll just change the dataset and that's it!\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"colab_type\": \"code\",\n        \"id\": \"kDep-uIcGYX2\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"!mkdir -p t5_emotion\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"TgNOy7a4LJ9h\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"3945df44-55d0-40d2-d98c-fa196bb9d554\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 54\n        }\n      },\n      \"source\": [\n        \"args_dict.update({'data_dir': 'emotion_data', 'output_dir': 't5_emotion', 'num_train_epochs':2})\\n\",\n        \"args = argparse.Namespace(**args_dict)\\n\",\n        \"print(args_dict)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"{'data_dir': 'emotion_data', 'output_dir': 't5_emotion', 'model_name_or_path': 't5-base', 'tokenizer_name_or_path': 't5-base', 'max_seq_length': 512, 'learning_rate': 0.0003, 'weight_decay': 0.0, 'adam_epsilon': 1e-08, 'warmup_steps': 0, 'train_batch_size': 8, 'eval_batch_size': 8, 'num_train_epochs': 2, 'gradient_accumulation_steps': 16, 'n_gpu': 1, 'early_stop_callback': False, 'fp_16': False, 'opt_level': 'O1', 'max_grad_norm': 1.0, 'seed': 42}\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"colab_type\": \"code\",\n        \"id\": \"at783kr7KvS4\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"checkpoint_callback = pl.callbacks.ModelCheckpoint(\\n\",\n        \"    filepath=args.output_dir, prefix=\\\"checkpoint\\\", monitor=\\\"val_loss\\\", mode=\\\"min\\\", save_top_k=5\\n\",\n        \")\\n\",\n        \"\\n\",\n        \"train_params = dict(\\n\",\n        \"    accumulate_grad_batches=args.gradient_accumulation_steps,\\n\",\n        \"    gpus=args.n_gpu,\\n\",\n        \"    max_epochs=args.num_train_epochs,\\n\",\n        \"    early_stop_callback=False,\\n\",\n        \"    precision= 16 if args.fp_16 else 32,\\n\",\n        \"    amp_level=args.opt_level,\\n\",\n        \"    gradient_clip_val=args.max_grad_norm,\\n\",\n        \"    checkpoint_callback=checkpoint_callback,\\n\",\n        \"    callbacks=[LoggingCallback()],\\n\",\n        \")\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"colab_type\": \"code\",\n        \"id\": \"1LBvpP01KvTA\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"def get_dataset(tokenizer, type_path, args):\\n\",\n        \"  return EmotionDataset(tokenizer=tokenizer, data_dir=args.data_dir, type_path=type_path,  max_len=args.max_seq_length)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"v3Tty_OHGlvR\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"0423fedb-7a93-4990-c6ce-545b52b86e63\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 978\n        }\n      },\n      \"source\": [\n        \"model = T5FineTuner(args)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:transformers.configuration_utils:loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json from cache at /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b\\n\",\n            \"INFO:transformers.configuration_utils:Model config T5Config {\\n\",\n            \"  \\\"architectures\\\": [\\n\",\n            \"    \\\"T5WithLMHeadModel\\\"\\n\",\n            \"  ],\\n\",\n            \"  \\\"d_ff\\\": 3072,\\n\",\n            \"  \\\"d_kv\\\": 64,\\n\",\n            \"  \\\"d_model\\\": 768,\\n\",\n            \"  \\\"decoder_start_token_id\\\": 0,\\n\",\n            \"  \\\"dropout_rate\\\": 0.1,\\n\",\n            \"  \\\"eos_token_id\\\": 1,\\n\",\n            \"  \\\"initializer_factor\\\": 1.0,\\n\",\n            \"  \\\"is_encoder_decoder\\\": true,\\n\",\n            \"  \\\"layer_norm_epsilon\\\": 1e-06,\\n\",\n            \"  \\\"model_type\\\": \\\"t5\\\",\\n\",\n            \"  \\\"n_positions\\\": 512,\\n\",\n            \"  \\\"num_heads\\\": 12,\\n\",\n            \"  \\\"num_layers\\\": 12,\\n\",\n            \"  \\\"output_past\\\": true,\\n\",\n            \"  \\\"pad_token_id\\\": 0,\\n\",\n            \"  \\\"relative_attention_num_buckets\\\": 32,\\n\",\n            \"  \\\"task_specific_params\\\": {\\n\",\n            \"    \\\"summarization\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"length_penalty\\\": 2.0,\\n\",\n            \"      \\\"max_length\\\": 200,\\n\",\n            \"      \\\"min_length\\\": 30,\\n\",\n            \"      \\\"no_repeat_ngram_size\\\": 3,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"summarize: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_de\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to German: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_fr\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to French: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_ro\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to Romanian: \\\"\\n\",\n            \"    }\\n\",\n            \"  },\\n\",\n            \"  \\\"vocab_size\\\": 32128\\n\",\n            \"}\\n\",\n            \"\\n\",\n            \"INFO:transformers.modeling_utils:loading weights file https://cdn.huggingface.co/t5-base-pytorch_model.bin from cache at /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa\\n\",\n            \"INFO:transformers.modeling_utils:Weights of T5ForConditionalGeneration not initialized from pretrained model: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight']\\n\",\n            \"INFO:transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model from cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\\n\"\n          ],\n          \"name\": \"stderr\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"mIsW9pwEG27D\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"d0469592-9403-4397-c8cf-b2b4c48ba614\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 50\n        }\n      },\n      \"source\": [\n        \"trainer = pl.Trainer(**train_params)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:lightning:GPU available: True, used: True\\n\",\n            \"INFO:lightning:CUDA_VISIBLE_DEVICES: [0]\\n\"\n          ],\n          \"name\": \"stderr\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"xmk4GsEMHTfZ\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"ba492b59-fc67-4fd3-d42a-5965600679df\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 1000,\n          \"referenced_widgets\": [\n            \"61d58772a6a64c5c8ad30dab2563a56f\",\n            \"4000e73e6d804763986dc9a9c74456aa\",\n            \"0dd99276ab294c939d83320f4674d5c2\",\n            \"d306f7ff1ec94561aeed9ff59ba9b54b\",\n            \"0893a9730450433fa76a74b008a6f482\",\n            \"f8873c7201e1410cb0ec52cb7e34c3c9\",\n            \"234eb8b041c44358b2f993c2853162f7\",\n            \"8f73da698e85474fbecfd91bb7770c56\",\n            \"26a0cb124049417aa9dbdd010e3af03a\",\n            \"8a14bd8f2a424b15b48426fd5e320678\",\n            \"09ed6242c5ef4a4791a1074ff7e4616e\",\n            \"487a6ea92fe0463ebbcb63094fde5136\",\n            \"c050be8414044acdb1a496495d148302\",\n            \"56a67d534f284df0bc1121f1e264f5e2\",\n            \"f168c4ae2d014e89bacc58e43427302e\",\n            \"5cabe7d5ed6b46be882c558d28a29ca2\",\n            \"1681a9ce7f9340caa50c4204777a6f9e\",\n            \"a9f0c66f958e493286155c8d2631d255\",\n            \"e04d6312d5d4425ab726588c485e668c\",\n            \"fab8ee7d5d3940819eb9131efbbad791\",\n            \"6dd2781f88eb4549b4203dfec9c1a98e\",\n            \"893ba880ac6545baa6eb4a532ecc5753\",\n            \"d4fc7ae628c94a758ce694318bc620ba\",\n            \"4c33ca548b5e4738abdac09575e2a325\",\n            \"ff475d6cdc074c14aa7b2cfede771b07\",\n            \"d77faf8b9ea6480abe594114823ca52f\",\n            \"ee4f41b591fe41a5a2d915c343b16c1d\",\n            \"d8946214acc44c4cb97688538daaa33f\",\n            \"9b9306452732495cbb1acd3e2fcf3b69\",\n            \"f42e9e596ad0485b842fee92d1884750\",\n            \"1d9f8718ba4d4b60997757ea7f1db72b\",\n            \"63db466ae63b42a5a79d051ef5af653e\"\n          ]\n        }\n      },\n      \"source\": [\n        \"trainer.fit(model)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:lightning:\\n\",\n            \"    | Name                                                                  | Type                       | Params\\n\",\n            \"-----------------------------------------------------------------------------------------------------------------\\n\",\n            \"0   | model                                                                 | T5ForConditionalGeneration | 222 M \\n\",\n            \"1   | model.shared                                                          | Embedding                  | 24 M  \\n\",\n            \"2   | model.encoder                                                         | T5Stack                    | 109 M \\n\",\n            \"3   | model.encoder.block                                                   | ModuleList                 | 84 M  \\n\",\n            \"4   | model.encoder.block.0                                                 | T5Block                    | 7 M   \\n\",\n            \"5   | model.encoder.block.0.layer                                           | ModuleList                 | 7 M   \\n\",\n            \"6   | model.encoder.block.0.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"7   | model.encoder.block.0.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"8   | model.encoder.block.0.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"9   | model.encoder.block.0.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"10  | model.encoder.block.0.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"11  | model.encoder.block.0.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"12  | model.encoder.block.0.layer.0.SelfAttention.relative_attention_bias   | Embedding                  | 384   \\n\",\n            \"13  | model.encoder.block.0.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"14  | model.encoder.block.0.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"15  | model.encoder.block.0.layer.1                                         | T5LayerFF                  | 4 M   \\n\",\n            \"16  | model.encoder.block.0.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"17  | model.encoder.block.0.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"18  | model.encoder.block.0.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"19  | model.encoder.block.0.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"20  | model.encoder.block.0.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"21  | model.encoder.block.0.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"22  | model.encoder.block.1                                                 | T5Block                    | 7 M   \\n\",\n            \"23  | model.encoder.block.1.layer                                           | ModuleList                 | 7 M   \\n\",\n            \"24  | model.encoder.block.1.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"25  | model.encoder.block.1.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"26  | model.encoder.block.1.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"27  | model.encoder.block.1.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"28  | model.encoder.block.1.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"29  | model.encoder.block.1.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"30  | model.encoder.block.1.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"31  | model.encoder.block.1.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"32  | model.encoder.block.1.layer.1                                         | T5LayerFF                  | 4 M   \\n\",\n            \"33  | model.encoder.block.1.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"34  | model.encoder.block.1.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"35  | model.encoder.block.1.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"36  | model.encoder.block.1.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"37  | model.encoder.block.1.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"38  | model.encoder.block.1.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"39  | model.encoder.block.2                                                 | T5Block                    | 7 M   \\n\",\n            \"40  | model.encoder.block.2.layer                                           | ModuleList                 | 7 M   \\n\",\n            \"41  | model.encoder.block.2.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"42  | model.encoder.block.2.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"43  | model.encoder.block.2.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"44  | model.encoder.block.2.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"45  | model.encoder.block.2.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"46  | model.encoder.block.2.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"47  | model.encoder.block.2.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"48  | model.encoder.block.2.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"49  | model.encoder.block.2.layer.1                                         | T5LayerFF                  | 4 M   \\n\",\n            \"50  | model.encoder.block.2.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"51  | model.encoder.block.2.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"52  | model.encoder.block.2.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"53  | model.encoder.block.2.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"54  | model.encoder.block.2.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"55  | model.encoder.block.2.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"56  | model.encoder.block.3                                                 | T5Block                    | 7 M   \\n\",\n            \"57  | model.encoder.block.3.layer                                           | ModuleList                 | 7 M   \\n\",\n            \"58  | model.encoder.block.3.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"59  | model.encoder.block.3.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"60  | model.encoder.block.3.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"61  | model.encoder.block.3.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"62  | model.encoder.block.3.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"63  | model.encoder.block.3.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"64  | model.encoder.block.3.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"65  | model.encoder.block.3.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"66  | model.encoder.block.3.layer.1                                         | T5LayerFF                  | 4 M   \\n\",\n            \"67  | model.encoder.block.3.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"68  | model.encoder.block.3.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"69  | model.encoder.block.3.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"70  | model.encoder.block.3.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"71  | model.encoder.block.3.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"72  | model.encoder.block.3.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"73  | model.encoder.block.4                                                 | T5Block                    | 7 M   \\n\",\n            \"74  | model.encoder.block.4.layer                                           | ModuleList                 | 7 M   \\n\",\n            \"75  | model.encoder.block.4.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"76  | model.encoder.block.4.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"77  | model.encoder.block.4.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"78  | model.encoder.block.4.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"79  | model.encoder.block.4.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"80  | model.encoder.block.4.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"81  | model.encoder.block.4.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"82  | model.encoder.block.4.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"83  | model.encoder.block.4.layer.1                                         | T5LayerFF                  | 4 M   \\n\",\n            \"84  | model.encoder.block.4.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"85  | model.encoder.block.4.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"86  | model.encoder.block.4.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"87  | model.encoder.block.4.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"88  | model.encoder.block.4.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"89  | model.encoder.block.4.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"90  | model.encoder.block.5                                                 | T5Block                    | 7 M   \\n\",\n            \"91  | model.encoder.block.5.layer                                           | ModuleList                 | 7 M   \\n\",\n            \"92  | model.encoder.block.5.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"93  | model.encoder.block.5.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"94  | model.encoder.block.5.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"95  | model.encoder.block.5.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"96  | model.encoder.block.5.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"97  | model.encoder.block.5.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"98  | model.encoder.block.5.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"99  | model.encoder.block.5.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"100 | model.encoder.block.5.layer.1                                         | T5LayerFF                  | 4 M   \\n\",\n            \"101 | model.encoder.block.5.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"102 | model.encoder.block.5.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"103 | model.encoder.block.5.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"104 | model.encoder.block.5.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"105 | model.encoder.block.5.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"106 | model.encoder.block.5.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"107 | model.encoder.block.6                                                 | T5Block                    | 7 M   \\n\",\n            \"108 | model.encoder.block.6.layer                                           | ModuleList                 | 7 M   \\n\",\n            \"109 | model.encoder.block.6.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"110 | model.encoder.block.6.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"111 | model.encoder.block.6.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"112 | model.encoder.block.6.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"113 | model.encoder.block.6.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"114 | model.encoder.block.6.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"115 | model.encoder.block.6.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"116 | model.encoder.block.6.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"117 | model.encoder.block.6.layer.1                                         | T5LayerFF                  | 4 M   \\n\",\n            \"118 | model.encoder.block.6.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"119 | model.encoder.block.6.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"120 | model.encoder.block.6.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"121 | model.encoder.block.6.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"122 | model.encoder.block.6.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"123 | model.encoder.block.6.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"124 | model.encoder.block.7                                                 | T5Block                    | 7 M   \\n\",\n            \"125 | model.encoder.block.7.layer                                           | ModuleList                 | 7 M   \\n\",\n            \"126 | model.encoder.block.7.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"127 | model.encoder.block.7.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"128 | model.encoder.block.7.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"129 | model.encoder.block.7.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"130 | model.encoder.block.7.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"131 | model.encoder.block.7.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"132 | model.encoder.block.7.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"133 | model.encoder.block.7.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"134 | model.encoder.block.7.layer.1                                         | T5LayerFF                  | 4 M   \\n\",\n            \"135 | model.encoder.block.7.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"136 | model.encoder.block.7.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"137 | model.encoder.block.7.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"138 | model.encoder.block.7.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"139 | model.encoder.block.7.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"140 | model.encoder.block.7.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"141 | model.encoder.block.8                                                 | T5Block                    | 7 M   \\n\",\n            \"142 | model.encoder.block.8.layer                                           | ModuleList                 | 7 M   \\n\",\n            \"143 | model.encoder.block.8.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"144 | model.encoder.block.8.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"145 | model.encoder.block.8.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"146 | model.encoder.block.8.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"147 | model.encoder.block.8.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"148 | model.encoder.block.8.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"149 | model.encoder.block.8.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"150 | model.encoder.block.8.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"151 | model.encoder.block.8.layer.1                                         | T5LayerFF                  | 4 M   \\n\",\n            \"152 | model.encoder.block.8.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"153 | model.encoder.block.8.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"154 | model.encoder.block.8.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"155 | model.encoder.block.8.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"156 | model.encoder.block.8.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"157 | model.encoder.block.8.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"158 | model.encoder.block.9                                                 | T5Block                    | 7 M   \\n\",\n            \"159 | model.encoder.block.9.layer                                           | ModuleList                 | 7 M   \\n\",\n            \"160 | model.encoder.block.9.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"161 | model.encoder.block.9.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"162 | model.encoder.block.9.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"163 | model.encoder.block.9.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"164 | model.encoder.block.9.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"165 | model.encoder.block.9.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"166 | model.encoder.block.9.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"167 | model.encoder.block.9.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"168 | model.encoder.block.9.layer.1                                         | T5LayerFF                  | 4 M   \\n\",\n            \"169 | model.encoder.block.9.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"170 | model.encoder.block.9.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"171 | model.encoder.block.9.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"172 | model.encoder.block.9.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"173 | model.encoder.block.9.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"174 | model.encoder.block.9.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"175 | model.encoder.block.10                                                | T5Block                    | 7 M   \\n\",\n            \"176 | model.encoder.block.10.layer                                          | ModuleList                 | 7 M   \\n\",\n            \"177 | model.encoder.block.10.layer.0                                        | T5LayerSelfAttention       | 2 M   \\n\",\n            \"178 | model.encoder.block.10.layer.0.SelfAttention                          | T5Attention                | 2 M   \\n\",\n            \"179 | model.encoder.block.10.layer.0.SelfAttention.q                        | Linear                     | 589 K \\n\",\n            \"180 | model.encoder.block.10.layer.0.SelfAttention.k                        | Linear                     | 589 K \\n\",\n            \"181 | model.encoder.block.10.layer.0.SelfAttention.v                        | Linear                     | 589 K \\n\",\n            \"182 | model.encoder.block.10.layer.0.SelfAttention.o                        | Linear                     | 589 K \\n\",\n            \"183 | model.encoder.block.10.layer.0.layer_norm                             | T5LayerNorm                | 768   \\n\",\n            \"184 | model.encoder.block.10.layer.0.dropout                                | Dropout                    | 0     \\n\",\n            \"185 | model.encoder.block.10.layer.1                                        | T5LayerFF                  | 4 M   \\n\",\n            \"186 | model.encoder.block.10.layer.1.DenseReluDense                         | T5DenseReluDense           | 4 M   \\n\",\n            \"187 | model.encoder.block.10.layer.1.DenseReluDense.wi                      | Linear                     | 2 M   \\n\",\n            \"188 | model.encoder.block.10.layer.1.DenseReluDense.wo                      | Linear                     | 2 M   \\n\",\n            \"189 | model.encoder.block.10.layer.1.DenseReluDense.dropout                 | Dropout                    | 0     \\n\",\n            \"190 | model.encoder.block.10.layer.1.layer_norm                             | T5LayerNorm                | 768   \\n\",\n            \"191 | model.encoder.block.10.layer.1.dropout                                | Dropout                    | 0     \\n\",\n            \"192 | model.encoder.block.11                                                | T5Block                    | 7 M   \\n\",\n            \"193 | model.encoder.block.11.layer                                          | ModuleList                 | 7 M   \\n\",\n            \"194 | model.encoder.block.11.layer.0                                        | T5LayerSelfAttention       | 2 M   \\n\",\n            \"195 | model.encoder.block.11.layer.0.SelfAttention                          | T5Attention                | 2 M   \\n\",\n            \"196 | model.encoder.block.11.layer.0.SelfAttention.q                        | Linear                     | 589 K \\n\",\n            \"197 | model.encoder.block.11.layer.0.SelfAttention.k                        | Linear                     | 589 K \\n\",\n            \"198 | model.encoder.block.11.layer.0.SelfAttention.v                        | Linear                     | 589 K \\n\",\n            \"199 | model.encoder.block.11.layer.0.SelfAttention.o                        | Linear                     | 589 K \\n\",\n            \"200 | model.encoder.block.11.layer.0.layer_norm                             | T5LayerNorm                | 768   \\n\",\n            \"201 | model.encoder.block.11.layer.0.dropout                                | Dropout                    | 0     \\n\",\n            \"202 | model.encoder.block.11.layer.1                                        | T5LayerFF                  | 4 M   \\n\",\n            \"203 | model.encoder.block.11.layer.1.DenseReluDense                         | T5DenseReluDense           | 4 M   \\n\",\n            \"204 | model.encoder.block.11.layer.1.DenseReluDense.wi                      | Linear                     | 2 M   \\n\",\n            \"205 | model.encoder.block.11.layer.1.DenseReluDense.wo                      | Linear                     | 2 M   \\n\",\n            \"206 | model.encoder.block.11.layer.1.DenseReluDense.dropout                 | Dropout                    | 0     \\n\",\n            \"207 | model.encoder.block.11.layer.1.layer_norm                             | T5LayerNorm                | 768   \\n\",\n            \"208 | model.encoder.block.11.layer.1.dropout                                | Dropout                    | 0     \\n\",\n            \"209 | model.encoder.final_layer_norm                                        | T5LayerNorm                | 768   \\n\",\n            \"210 | model.encoder.dropout                                                 | Dropout                    | 0     \\n\",\n            \"211 | model.decoder                                                         | T5Stack                    | 137 M \\n\",\n            \"212 | model.decoder.block                                                   | ModuleList                 | 113 M \\n\",\n            \"213 | model.decoder.block.0                                                 | T5Block                    | 9 M   \\n\",\n            \"214 | model.decoder.block.0.layer                                           | ModuleList                 | 9 M   \\n\",\n            \"215 | model.decoder.block.0.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"216 | model.decoder.block.0.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"217 | model.decoder.block.0.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"218 | model.decoder.block.0.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"219 | model.decoder.block.0.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"220 | model.decoder.block.0.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"221 | model.decoder.block.0.layer.0.SelfAttention.relative_attention_bias   | Embedding                  | 384   \\n\",\n            \"222 | model.decoder.block.0.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"223 | model.decoder.block.0.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"224 | model.decoder.block.0.layer.1                                         | T5LayerCrossAttention      | 2 M   \\n\",\n            \"225 | model.decoder.block.0.layer.1.EncDecAttention                         | T5Attention                | 2 M   \\n\",\n            \"226 | model.decoder.block.0.layer.1.EncDecAttention.q                       | Linear                     | 589 K \\n\",\n            \"227 | model.decoder.block.0.layer.1.EncDecAttention.k                       | Linear                     | 589 K \\n\",\n            \"228 | model.decoder.block.0.layer.1.EncDecAttention.v                       | Linear                     | 589 K \\n\",\n            \"229 | model.decoder.block.0.layer.1.EncDecAttention.o                       | Linear                     | 589 K \\n\",\n            \"230 | model.decoder.block.0.layer.1.EncDecAttention.relative_attention_bias | Embedding                  | 384   \\n\",\n            \"231 | model.decoder.block.0.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"232 | model.decoder.block.0.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"233 | model.decoder.block.0.layer.2                                         | T5LayerFF                  | 4 M   \\n\",\n            \"234 | model.decoder.block.0.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"235 | model.decoder.block.0.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"236 | model.decoder.block.0.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"237 | model.decoder.block.0.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"238 | model.decoder.block.0.layer.2.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"239 | model.decoder.block.0.layer.2.dropout                                 | Dropout                    | 0     \\n\",\n            \"240 | model.decoder.block.1                                                 | T5Block                    | 9 M   \\n\",\n            \"241 | model.decoder.block.1.layer                                           | ModuleList                 | 9 M   \\n\",\n            \"242 | model.decoder.block.1.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"243 | model.decoder.block.1.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"244 | model.decoder.block.1.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"245 | model.decoder.block.1.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"246 | model.decoder.block.1.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"247 | model.decoder.block.1.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"248 | model.decoder.block.1.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"249 | model.decoder.block.1.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"250 | model.decoder.block.1.layer.1                                         | T5LayerCrossAttention      | 2 M   \\n\",\n            \"251 | model.decoder.block.1.layer.1.EncDecAttention                         | T5Attention                | 2 M   \\n\",\n            \"252 | model.decoder.block.1.layer.1.EncDecAttention.q                       | Linear                     | 589 K \\n\",\n            \"253 | model.decoder.block.1.layer.1.EncDecAttention.k                       | Linear                     | 589 K \\n\",\n            \"254 | model.decoder.block.1.layer.1.EncDecAttention.v                       | Linear                     | 589 K \\n\",\n            \"255 | model.decoder.block.1.layer.1.EncDecAttention.o                       | Linear                     | 589 K \\n\",\n            \"256 | model.decoder.block.1.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"257 | model.decoder.block.1.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"258 | model.decoder.block.1.layer.2                                         | T5LayerFF                  | 4 M   \\n\",\n            \"259 | model.decoder.block.1.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"260 | model.decoder.block.1.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"261 | model.decoder.block.1.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"262 | model.decoder.block.1.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"263 | model.decoder.block.1.layer.2.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"264 | model.decoder.block.1.layer.2.dropout                                 | Dropout                    | 0     \\n\",\n            \"265 | model.decoder.block.2                                                 | T5Block                    | 9 M   \\n\",\n            \"266 | model.decoder.block.2.layer                                           | ModuleList                 | 9 M   \\n\",\n            \"267 | model.decoder.block.2.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"268 | model.decoder.block.2.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"269 | model.decoder.block.2.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"270 | model.decoder.block.2.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"271 | model.decoder.block.2.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"272 | model.decoder.block.2.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"273 | model.decoder.block.2.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"274 | model.decoder.block.2.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"275 | model.decoder.block.2.layer.1                                         | T5LayerCrossAttention      | 2 M   \\n\",\n            \"276 | model.decoder.block.2.layer.1.EncDecAttention                         | T5Attention                | 2 M   \\n\",\n            \"277 | model.decoder.block.2.layer.1.EncDecAttention.q                       | Linear                     | 589 K \\n\",\n            \"278 | model.decoder.block.2.layer.1.EncDecAttention.k                       | Linear                     | 589 K \\n\",\n            \"279 | model.decoder.block.2.layer.1.EncDecAttention.v                       | Linear                     | 589 K \\n\",\n            \"280 | model.decoder.block.2.layer.1.EncDecAttention.o                       | Linear                     | 589 K \\n\",\n            \"281 | model.decoder.block.2.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"282 | model.decoder.block.2.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"283 | model.decoder.block.2.layer.2                                         | T5LayerFF                  | 4 M   \\n\",\n            \"284 | model.decoder.block.2.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"285 | model.decoder.block.2.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"286 | model.decoder.block.2.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"287 | model.decoder.block.2.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"288 | model.decoder.block.2.layer.2.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"289 | model.decoder.block.2.layer.2.dropout                                 | Dropout                    | 0     \\n\",\n            \"290 | model.decoder.block.3                                                 | T5Block                    | 9 M   \\n\",\n            \"291 | model.decoder.block.3.layer                                           | ModuleList                 | 9 M   \\n\",\n            \"292 | model.decoder.block.3.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"293 | model.decoder.block.3.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"294 | model.decoder.block.3.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"295 | model.decoder.block.3.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"296 | model.decoder.block.3.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"297 | model.decoder.block.3.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"298 | model.decoder.block.3.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"299 | model.decoder.block.3.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"300 | model.decoder.block.3.layer.1                                         | T5LayerCrossAttention      | 2 M   \\n\",\n            \"301 | model.decoder.block.3.layer.1.EncDecAttention                         | T5Attention                | 2 M   \\n\",\n            \"302 | model.decoder.block.3.layer.1.EncDecAttention.q                       | Linear                     | 589 K \\n\",\n            \"303 | model.decoder.block.3.layer.1.EncDecAttention.k                       | Linear                     | 589 K \\n\",\n            \"304 | model.decoder.block.3.layer.1.EncDecAttention.v                       | Linear                     | 589 K \\n\",\n            \"305 | model.decoder.block.3.layer.1.EncDecAttention.o                       | Linear                     | 589 K \\n\",\n            \"306 | model.decoder.block.3.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"307 | model.decoder.block.3.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"308 | model.decoder.block.3.layer.2                                         | T5LayerFF                  | 4 M   \\n\",\n            \"309 | model.decoder.block.3.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"310 | model.decoder.block.3.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"311 | model.decoder.block.3.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"312 | model.decoder.block.3.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"313 | model.decoder.block.3.layer.2.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"314 | model.decoder.block.3.layer.2.dropout                                 | Dropout                    | 0     \\n\",\n            \"315 | model.decoder.block.4                                                 | T5Block                    | 9 M   \\n\",\n            \"316 | model.decoder.block.4.layer                                           | ModuleList                 | 9 M   \\n\",\n            \"317 | model.decoder.block.4.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"318 | model.decoder.block.4.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"319 | model.decoder.block.4.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"320 | model.decoder.block.4.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"321 | model.decoder.block.4.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"322 | model.decoder.block.4.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"323 | model.decoder.block.4.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"324 | model.decoder.block.4.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"325 | model.decoder.block.4.layer.1                                         | T5LayerCrossAttention      | 2 M   \\n\",\n            \"326 | model.decoder.block.4.layer.1.EncDecAttention                         | T5Attention                | 2 M   \\n\",\n            \"327 | model.decoder.block.4.layer.1.EncDecAttention.q                       | Linear                     | 589 K \\n\",\n            \"328 | model.decoder.block.4.layer.1.EncDecAttention.k                       | Linear                     | 589 K \\n\",\n            \"329 | model.decoder.block.4.layer.1.EncDecAttention.v                       | Linear                     | 589 K \\n\",\n            \"330 | model.decoder.block.4.layer.1.EncDecAttention.o                       | Linear                     | 589 K \\n\",\n            \"331 | model.decoder.block.4.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"332 | model.decoder.block.4.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"333 | model.decoder.block.4.layer.2                                         | T5LayerFF                  | 4 M   \\n\",\n            \"334 | model.decoder.block.4.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"335 | model.decoder.block.4.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"336 | model.decoder.block.4.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"337 | model.decoder.block.4.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"338 | model.decoder.block.4.layer.2.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"339 | model.decoder.block.4.layer.2.dropout                                 | Dropout                    | 0     \\n\",\n            \"340 | model.decoder.block.5                                                 | T5Block                    | 9 M   \\n\",\n            \"341 | model.decoder.block.5.layer                                           | ModuleList                 | 9 M   \\n\",\n            \"342 | model.decoder.block.5.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"343 | model.decoder.block.5.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"344 | model.decoder.block.5.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"345 | model.decoder.block.5.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"346 | model.decoder.block.5.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"347 | model.decoder.block.5.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"348 | model.decoder.block.5.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"349 | model.decoder.block.5.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"350 | model.decoder.block.5.layer.1                                         | T5LayerCrossAttention      | 2 M   \\n\",\n            \"351 | model.decoder.block.5.layer.1.EncDecAttention                         | T5Attention                | 2 M   \\n\",\n            \"352 | model.decoder.block.5.layer.1.EncDecAttention.q                       | Linear                     | 589 K \\n\",\n            \"353 | model.decoder.block.5.layer.1.EncDecAttention.k                       | Linear                     | 589 K \\n\",\n            \"354 | model.decoder.block.5.layer.1.EncDecAttention.v                       | Linear                     | 589 K \\n\",\n            \"355 | model.decoder.block.5.layer.1.EncDecAttention.o                       | Linear                     | 589 K \\n\",\n            \"356 | model.decoder.block.5.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"357 | model.decoder.block.5.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"358 | model.decoder.block.5.layer.2                                         | T5LayerFF                  | 4 M   \\n\",\n            \"359 | model.decoder.block.5.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"360 | model.decoder.block.5.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"361 | model.decoder.block.5.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"362 | model.decoder.block.5.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"363 | model.decoder.block.5.layer.2.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"364 | model.decoder.block.5.layer.2.dropout                                 | Dropout                    | 0     \\n\",\n            \"365 | model.decoder.block.6                                                 | T5Block                    | 9 M   \\n\",\n            \"366 | model.decoder.block.6.layer                                           | ModuleList                 | 9 M   \\n\",\n            \"367 | model.decoder.block.6.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"368 | model.decoder.block.6.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"369 | model.decoder.block.6.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"370 | model.decoder.block.6.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"371 | model.decoder.block.6.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"372 | model.decoder.block.6.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"373 | model.decoder.block.6.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"374 | model.decoder.block.6.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"375 | model.decoder.block.6.layer.1                                         | T5LayerCrossAttention      | 2 M   \\n\",\n            \"376 | model.decoder.block.6.layer.1.EncDecAttention                         | T5Attention                | 2 M   \\n\",\n            \"377 | model.decoder.block.6.layer.1.EncDecAttention.q                       | Linear                     | 589 K \\n\",\n            \"378 | model.decoder.block.6.layer.1.EncDecAttention.k                       | Linear                     | 589 K \\n\",\n            \"379 | model.decoder.block.6.layer.1.EncDecAttention.v                       | Linear                     | 589 K \\n\",\n            \"380 | model.decoder.block.6.layer.1.EncDecAttention.o                       | Linear                     | 589 K \\n\",\n            \"381 | model.decoder.block.6.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"382 | model.decoder.block.6.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"383 | model.decoder.block.6.layer.2                                         | T5LayerFF                  | 4 M   \\n\",\n            \"384 | model.decoder.block.6.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"385 | model.decoder.block.6.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"386 | model.decoder.block.6.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"387 | model.decoder.block.6.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"388 | model.decoder.block.6.layer.2.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"389 | model.decoder.block.6.layer.2.dropout                                 | Dropout                    | 0     \\n\",\n            \"390 | model.decoder.block.7                                                 | T5Block                    | 9 M   \\n\",\n            \"391 | model.decoder.block.7.layer                                           | ModuleList                 | 9 M   \\n\",\n            \"392 | model.decoder.block.7.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"393 | model.decoder.block.7.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"394 | model.decoder.block.7.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"395 | model.decoder.block.7.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"396 | model.decoder.block.7.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"397 | model.decoder.block.7.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"398 | model.decoder.block.7.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"399 | model.decoder.block.7.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"400 | model.decoder.block.7.layer.1                                         | T5LayerCrossAttention      | 2 M   \\n\",\n            \"401 | model.decoder.block.7.layer.1.EncDecAttention                         | T5Attention                | 2 M   \\n\",\n            \"402 | model.decoder.block.7.layer.1.EncDecAttention.q                       | Linear                     | 589 K \\n\",\n            \"403 | model.decoder.block.7.layer.1.EncDecAttention.k                       | Linear                     | 589 K \\n\",\n            \"404 | model.decoder.block.7.layer.1.EncDecAttention.v                       | Linear                     | 589 K \\n\",\n            \"405 | model.decoder.block.7.layer.1.EncDecAttention.o                       | Linear                     | 589 K \\n\",\n            \"406 | model.decoder.block.7.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"407 | model.decoder.block.7.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"408 | model.decoder.block.7.layer.2                                         | T5LayerFF                  | 4 M   \\n\",\n            \"409 | model.decoder.block.7.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"410 | model.decoder.block.7.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"411 | model.decoder.block.7.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"412 | model.decoder.block.7.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"413 | model.decoder.block.7.layer.2.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"414 | model.decoder.block.7.layer.2.dropout                                 | Dropout                    | 0     \\n\",\n            \"415 | model.decoder.block.8                                                 | T5Block                    | 9 M   \\n\",\n            \"416 | model.decoder.block.8.layer                                           | ModuleList                 | 9 M   \\n\",\n            \"417 | model.decoder.block.8.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"418 | model.decoder.block.8.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"419 | model.decoder.block.8.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"420 | model.decoder.block.8.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"421 | model.decoder.block.8.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"422 | model.decoder.block.8.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"423 | model.decoder.block.8.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"424 | model.decoder.block.8.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"425 | model.decoder.block.8.layer.1                                         | T5LayerCrossAttention      | 2 M   \\n\",\n            \"426 | model.decoder.block.8.layer.1.EncDecAttention                         | T5Attention                | 2 M   \\n\",\n            \"427 | model.decoder.block.8.layer.1.EncDecAttention.q                       | Linear                     | 589 K \\n\",\n            \"428 | model.decoder.block.8.layer.1.EncDecAttention.k                       | Linear                     | 589 K \\n\",\n            \"429 | model.decoder.block.8.layer.1.EncDecAttention.v                       | Linear                     | 589 K \\n\",\n            \"430 | model.decoder.block.8.layer.1.EncDecAttention.o                       | Linear                     | 589 K \\n\",\n            \"431 | model.decoder.block.8.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"432 | model.decoder.block.8.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"433 | model.decoder.block.8.layer.2                                         | T5LayerFF                  | 4 M   \\n\",\n            \"434 | model.decoder.block.8.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"435 | model.decoder.block.8.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"436 | model.decoder.block.8.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"437 | model.decoder.block.8.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"438 | model.decoder.block.8.layer.2.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"439 | model.decoder.block.8.layer.2.dropout                                 | Dropout                    | 0     \\n\",\n            \"440 | model.decoder.block.9                                                 | T5Block                    | 9 M   \\n\",\n            \"441 | model.decoder.block.9.layer                                           | ModuleList                 | 9 M   \\n\",\n            \"442 | model.decoder.block.9.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"443 | model.decoder.block.9.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"444 | model.decoder.block.9.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"445 | model.decoder.block.9.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"446 | model.decoder.block.9.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"447 | model.decoder.block.9.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"448 | model.decoder.block.9.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"449 | model.decoder.block.9.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"450 | model.decoder.block.9.layer.1                                         | T5LayerCrossAttention      | 2 M   \\n\",\n            \"451 | model.decoder.block.9.layer.1.EncDecAttention                         | T5Attention                | 2 M   \\n\",\n            \"452 | model.decoder.block.9.layer.1.EncDecAttention.q                       | Linear                     | 589 K \\n\",\n            \"453 | model.decoder.block.9.layer.1.EncDecAttention.k                       | Linear                     | 589 K \\n\",\n            \"454 | model.decoder.block.9.layer.1.EncDecAttention.v                       | Linear                     | 589 K \\n\",\n            \"455 | model.decoder.block.9.layer.1.EncDecAttention.o                       | Linear                     | 589 K \\n\",\n            \"456 | model.decoder.block.9.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"457 | model.decoder.block.9.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"458 | model.decoder.block.9.layer.2                                         | T5LayerFF                  | 4 M   \\n\",\n            \"459 | model.decoder.block.9.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"460 | model.decoder.block.9.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"461 | model.decoder.block.9.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"462 | model.decoder.block.9.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"463 | model.decoder.block.9.layer.2.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"464 | model.decoder.block.9.layer.2.dropout                                 | Dropout                    | 0     \\n\",\n            \"465 | model.decoder.block.10                                                | T5Block                    | 9 M   \\n\",\n            \"466 | model.decoder.block.10.layer                                          | ModuleList                 | 9 M   \\n\",\n            \"467 | model.decoder.block.10.layer.0                                        | T5LayerSelfAttention       | 2 M   \\n\",\n            \"468 | model.decoder.block.10.layer.0.SelfAttention                          | T5Attention                | 2 M   \\n\",\n            \"469 | model.decoder.block.10.layer.0.SelfAttention.q                        | Linear                     | 589 K \\n\",\n            \"470 | model.decoder.block.10.layer.0.SelfAttention.k                        | Linear                     | 589 K \\n\",\n            \"471 | model.decoder.block.10.layer.0.SelfAttention.v                        | Linear                     | 589 K \\n\",\n            \"472 | model.decoder.block.10.layer.0.SelfAttention.o                        | Linear                     | 589 K \\n\",\n            \"473 | model.decoder.block.10.layer.0.layer_norm                             | T5LayerNorm                | 768   \\n\",\n            \"474 | model.decoder.block.10.layer.0.dropout                                | Dropout                    | 0     \\n\",\n            \"475 | model.decoder.block.10.layer.1                                        | T5LayerCrossAttention      | 2 M   \\n\",\n            \"476 | model.decoder.block.10.layer.1.EncDecAttention                        | T5Attention                | 2 M   \\n\",\n            \"477 | model.decoder.block.10.layer.1.EncDecAttention.q                      | Linear                     | 589 K \\n\",\n            \"478 | model.decoder.block.10.layer.1.EncDecAttention.k                      | Linear                     | 589 K \\n\",\n            \"479 | model.decoder.block.10.layer.1.EncDecAttention.v                      | Linear                     | 589 K \\n\",\n            \"480 | model.decoder.block.10.layer.1.EncDecAttention.o                      | Linear                     | 589 K \\n\",\n            \"481 | model.decoder.block.10.layer.1.layer_norm                             | T5LayerNorm                | 768   \\n\",\n            \"482 | model.decoder.block.10.layer.1.dropout                                | Dropout                    | 0     \\n\",\n            \"483 | model.decoder.block.10.layer.2                                        | T5LayerFF                  | 4 M   \\n\",\n            \"484 | model.decoder.block.10.layer.2.DenseReluDense                         | T5DenseReluDense           | 4 M   \\n\",\n            \"485 | model.decoder.block.10.layer.2.DenseReluDense.wi                      | Linear                     | 2 M   \\n\",\n            \"486 | model.decoder.block.10.layer.2.DenseReluDense.wo                      | Linear                     | 2 M   \\n\",\n            \"487 | model.decoder.block.10.layer.2.DenseReluDense.dropout                 | Dropout                    | 0     \\n\",\n            \"488 | model.decoder.block.10.layer.2.layer_norm                             | T5LayerNorm                | 768   \\n\",\n            \"489 | model.decoder.block.10.layer.2.dropout                                | Dropout                    | 0     \\n\",\n            \"490 | model.decoder.block.11                                                | T5Block                    | 9 M   \\n\",\n            \"491 | model.decoder.block.11.layer                                          | ModuleList                 | 9 M   \\n\",\n            \"492 | model.decoder.block.11.layer.0                                        | T5LayerSelfAttention       | 2 M   \\n\",\n            \"493 | model.decoder.block.11.layer.0.SelfAttention                          | T5Attention                | 2 M   \\n\",\n            \"494 | model.decoder.block.11.layer.0.SelfAttention.q                        | Linear                     | 589 K \\n\",\n            \"495 | model.decoder.block.11.layer.0.SelfAttention.k                        | Linear                     | 589 K \\n\",\n            \"496 | model.decoder.block.11.layer.0.SelfAttention.v                        | Linear                     | 589 K \\n\",\n            \"497 | model.decoder.block.11.layer.0.SelfAttention.o                        | Linear                     | 589 K \\n\",\n            \"498 | model.decoder.block.11.layer.0.layer_norm                             | T5LayerNorm                | 768   \\n\",\n            \"499 | model.decoder.block.11.layer.0.dropout                                | Dropout                    | 0     \\n\",\n            \"500 | model.decoder.block.11.layer.1                                        | T5LayerCrossAttention      | 2 M   \\n\",\n            \"501 | model.decoder.block.11.layer.1.EncDecAttention                        | T5Attention                | 2 M   \\n\",\n            \"502 | model.decoder.block.11.layer.1.EncDecAttention.q                      | Linear                     | 589 K \\n\",\n            \"503 | model.decoder.block.11.layer.1.EncDecAttention.k                      | Linear                     | 589 K \\n\",\n            \"504 | model.decoder.block.11.layer.1.EncDecAttention.v                      | Linear                     | 589 K \\n\",\n            \"505 | model.decoder.block.11.layer.1.EncDecAttention.o                      | Linear                     | 589 K \\n\",\n            \"506 | model.decoder.block.11.layer.1.layer_norm                             | T5LayerNorm                | 768   \\n\",\n            \"507 | model.decoder.block.11.layer.1.dropout                                | Dropout                    | 0     \\n\",\n            \"508 | model.decoder.block.11.layer.2                                        | T5LayerFF                  | 4 M   \\n\",\n            \"509 | model.decoder.block.11.layer.2.DenseReluDense                         | T5DenseReluDense           | 4 M   \\n\",\n            \"510 | model.decoder.block.11.layer.2.DenseReluDense.wi                      | Linear                     | 2 M   \\n\",\n            \"511 | model.decoder.block.11.layer.2.DenseReluDense.wo                      | Linear                     | 2 M   \\n\",\n            \"512 | model.decoder.block.11.layer.2.DenseReluDense.dropout                 | Dropout                    | 0     \\n\",\n            \"513 | model.decoder.block.11.layer.2.layer_norm                             | T5LayerNorm                | 768   \\n\",\n            \"514 | model.decoder.block.11.layer.2.dropout                                | Dropout                    | 0     \\n\",\n            \"515 | model.decoder.final_layer_norm                                        | T5LayerNorm                | 768   \\n\",\n            \"516 | model.decoder.dropout                                                 | Dropout                    | 0     \\n\",\n            \"517 | model.lm_head                                                         | Linear                     | 24 M  \\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"61d58772a6a64c5c8ad30dab2563a56f\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\r\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"26a0cb124049417aa9dbdd010e3af03a\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"1681a9ce7f9340caa50c4204777a6f9e\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:__main__:***** Validation results *****\\n\",\n            \"INFO:__main__:avg_val_loss = tensor(0.0846, device='cuda:0')\\n\",\n            \"\\n\",\n            \"INFO:__main__:loss = tensor(0.0290, device='cuda:0')\\n\",\n            \"\\n\",\n            \"INFO:__main__:train_loss = tensor(0.0290, device='cuda:0')\\n\",\n            \"\\n\",\n            \"INFO:__main__:val_loss = tensor(0.0846, device='cuda:0')\\n\",\n            \"\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"ff475d6cdc074c14aa7b2cfede771b07\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:__main__:***** Validation results *****\\n\",\n            \"INFO:__main__:avg_train_loss = tensor(0.5601, device='cuda:0')\\n\",\n            \"\\n\",\n            \"INFO:__main__:avg_val_loss = tensor(0.0696, device='cuda:0')\\n\",\n            \"\\n\",\n            \"INFO:__main__:epoch = 0\\n\",\n            \"\\n\",\n            \"INFO:__main__:loss = tensor(0.0134, device='cuda:0')\\n\",\n            \"\\n\",\n            \"INFO:__main__:train_loss = tensor(0.0134, device='cuda:0')\\n\",\n            \"\\n\",\n            \"INFO:__main__:val_loss = tensor(0.0696, device='cuda:0')\\n\",\n            \"\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"execute_result\",\n          \"data\": {\n            \"text/plain\": [\n              \"1\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          },\n          \"execution_count\": 70\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"GwdWdHG0RP5J\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"### Eval\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"colab_type\": \"code\",\n        \"id\": \"dq7cCiOPRQzs\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"import textwrap\\n\",\n        \"from tqdm.auto import tqdm\\n\",\n        \"from sklearn import metrics\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"colab_type\": \"code\",\n        \"id\": \"XKsHzqGMRQzz\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"dataset = EmotionDataset(tokenizer, 'emotion_data', 'test', 512)\\n\",\n        \"loader = DataLoader(dataset, batch_size=32, shuffle=True)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"colab_type\": \"code\",\n        \"id\": \"QK7s7IpERQz5\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"it = iter(loader)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"colab_type\": \"code\",\n        \"outputId\": \"a49604ae-31da-49bc-9a90-bb5bd1366ebf\",\n        \"id\": \"5_79Jk36RQz-\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 34\n        }\n      },\n      \"source\": [\n        \"batch = next(it)\\n\",\n        \"batch[\\\"source_ids\\\"].shape\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"execute_result\",\n          \"data\": {\n            \"text/plain\": [\n              \"torch.Size([32, 512])\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          },\n          \"execution_count\": 74\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"colab_type\": \"code\",\n        \"id\": \"RQZKyEaVRQ0B\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"outs = model.model.generate(input_ids=batch['source_ids'].cuda(), \\n\",\n        \"                              attention_mask=batch['source_mask'].cuda(), \\n\",\n        \"                              max_length=2)\\n\",\n        \"\\n\",\n        \"dec = [tokenizer.decode(ids) for ids in outs]\\n\",\n        \"\\n\",\n        \"texts = [tokenizer.decode(ids) for ids in batch['source_ids']]\\n\",\n        \"targets = [tokenizer.decode(ids) for ids in batch['target_ids']]\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"colab_type\": \"code\",\n        \"outputId\": \"93cdd40b-310f-458d-e5ae-21debf158a39\",\n        \"id\": \"aAjhiBcrRQ0E\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 1000\n        }\n      },\n      \"source\": [\n        \"for i in range(32):\\n\",\n        \"    c = texts[i]\\n\",\n        \"    lines = textwrap.wrap(\\\"text:\\\\n%s\\\\n\\\" % c, width=100)\\n\",\n        \"    print(\\\"\\\\n\\\".join(lines))\\n\",\n        \"    print(\\\"\\\\nActual sentiment: %s\\\" % targets[i])\\n\",\n        \"    print(\\\"predicted sentiment: %s\\\" % dec[i])\\n\",\n        \"    print(\\\"=====================================================================\\\\n\\\")\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"text: i feel like ive come a long way and im proud of what ive achieved not only this week but this\\n\",\n            \"year as well\\n\",\n            \"\\n\",\n            \"Actual sentiment: joy\\n\",\n            \"predicted sentiment: joy\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: i feel unfathomably rich in having had a healthy pregnancy so far\\n\",\n            \"\\n\",\n            \"Actual sentiment: joy\\n\",\n            \"predicted sentiment: joy\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: im just feeling emo and bitchy atm\\n\",\n            \"\\n\",\n            \"Actual sentiment: anger\\n\",\n            \"predicted sentiment: anger\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: i always feel troubled when we re on the road touring living in a van or more recently in the\\n\",\n            \"circus buses no place to hang my hat as the song lyric has it\\n\",\n            \"\\n\",\n            \"Actual sentiment: sadness\\n\",\n            \"predicted sentiment: sadness\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: i still feel confused and guilty about the whole thing\\n\",\n            \"\\n\",\n            \"Actual sentiment: fear\\n\",\n            \"predicted sentiment: fear\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: i feel immensely distracted by the barrage of media i receive solicit\\n\",\n            \"\\n\",\n            \"Actual sentiment: anger\\n\",\n            \"predicted sentiment: anger\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: im feeling too tortured to write today\\n\",\n            \"\\n\",\n            \"Actual sentiment: fear\\n\",\n            \"predicted sentiment: anger\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: i have the joy of allowing kids to feel like the valued treasures that they are and to just\\n\",\n            \"have a blast being a kid alongside with them but can i just say its an incredibly humbling\\n\",\n            \"experience to have influence into a childs life and to know that what you do and say is being\\n\",\n            \"internalized\\n\",\n            \"\\n\",\n            \"Actual sentiment: joy\\n\",\n            \"predicted sentiment: joy\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: i dont want flowers or candy but the kind of guy that knows i like thinly sliced limes in my\\n\",\n            \"mineral water because it makes me feel glamorous and is humored by how pretentious that is\\n\",\n            \"\\n\",\n            \"Actual sentiment: joy\\n\",\n            \"predicted sentiment: joy\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: i just was expressing myself and her unexpected and kind gesture made me feel bad for a short\\n\",\n            \"moment as that was not my intent but for a larger moment which remains with me it reminded me of my\\n\",\n            \"blessings like having good friends that have your back\\n\",\n            \"\\n\",\n            \"Actual sentiment: sadness\\n\",\n            \"predicted sentiment: sadness\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: im feeling brave ill snatch him to on my lap and after a few seconds of struggling he\\n\",\n            \"completely relaxes and submits to mommy scratches\\n\",\n            \"\\n\",\n            \"Actual sentiment: joy\\n\",\n            \"predicted sentiment: joy\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: im sick of feeling unimportant like nobody needs me\\n\",\n            \"\\n\",\n            \"Actual sentiment: sadness\\n\",\n            \"predicted sentiment: sadness\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: i feel like these unfortunate events fit in with my thought quote i posted above\\n\",\n            \"\\n\",\n            \"Actual sentiment: sadness\\n\",\n            \"predicted sentiment: sadness\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: i feel like they don t think it s sincere when it really is she told us exclusively\\n\",\n            \"\\n\",\n            \"Actual sentiment: joy\\n\",\n            \"predicted sentiment: joy\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: i feel a little low about being in japan and i always feel pangs of guilt when i fail to\\n\",\n            \"appreciate my living situation and decisions\\n\",\n            \"\\n\",\n            \"Actual sentiment: sadness\\n\",\n            \"predicted sentiment: sadness\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: i couldn t help but feel pissed off at both sides of the debate and the unnecessary dichotomy\\n\",\n            \"itself\\n\",\n            \"\\n\",\n            \"Actual sentiment: anger\\n\",\n            \"predicted sentiment: anger\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: i felt so bad for the bad grade and feeling like having to hide it that i didnt know what to\\n\",\n            \"say except to declare in all my frustration that i hated school\\n\",\n            \"\\n\",\n            \"Actual sentiment: sadness\\n\",\n            \"predicted sentiment: sadness\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: i do feel proud and happy and also very grateful to all who read me\\n\",\n            \"\\n\",\n            \"Actual sentiment: joy\\n\",\n            \"predicted sentiment: joy\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: i am going to have to check on in just a few minutes but there is this clock up above the\\n\",\n            \"screen that keeps ticking down the minutes i have left so am feeling a bit frantic\\n\",\n            \"\\n\",\n            \"Actual sentiment: fear\\n\",\n            \"predicted sentiment: fear\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: i am feeling bitchy this evening\\n\",\n            \"\\n\",\n            \"Actual sentiment: anger\\n\",\n            \"predicted sentiment: anger\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: i feel like my room is messy if theyre open\\n\",\n            \"\\n\",\n            \"Actual sentiment: sadness\\n\",\n            \"predicted sentiment: sadness\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: im starting to feel really pathetic giving the bulk of my enthusiasm these days to the\\n\",\n            \"kardashians us weekly and roseanne marathons and completely ignoring this blog\\n\",\n            \"\\n\",\n            \"Actual sentiment: sadness\\n\",\n            \"predicted sentiment: sadness\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: i am feeling content and happy with myself\\n\",\n            \"\\n\",\n            \"Actual sentiment: joy\\n\",\n            \"predicted sentiment: joy\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: i feel slightly saddened to know that some of the kids have also resigned during my absence\\n\",\n            \"\\n\",\n            \"Actual sentiment: sadness\\n\",\n            \"predicted sentiment: sadness\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: i feel that passionate about\\n\",\n            \"\\n\",\n            \"Actual sentiment: joy\\n\",\n            \"predicted sentiment: joy\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: i too feel a sense of melancholy for them\\n\",\n            \"\\n\",\n            \"Actual sentiment: sadness\\n\",\n            \"predicted sentiment: sadness\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: i can t quite figure out how i feel i m not devastated like i was with lucy and i m not sure\\n\",\n            \"if that s because it s easier to do after the first time or what\\n\",\n            \"\\n\",\n            \"Actual sentiment: sadness\\n\",\n            \"predicted sentiment: sadness\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: i feel ashamed of you\\n\",\n            \"\\n\",\n            \"Actual sentiment: sadness\\n\",\n            \"predicted sentiment: sadness\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: i like the fresh feeling of sweet he gave me\\n\",\n            \"\\n\",\n            \"Actual sentiment: joy\\n\",\n            \"predicted sentiment: joy\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: i feel so jaded and bored\\n\",\n            \"\\n\",\n            \"Actual sentiment: sadness\\n\",\n            \"predicted sentiment: sadness\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: i feel fake because i think if you really want to have a good conversation and make good\\n\",\n            \"contact you have to appear especially self confident and even risk talking to some people which are\\n\",\n            \"no good to talk to at all until you meet one person which you have a good connection to\\n\",\n            \"\\n\",\n            \"Actual sentiment: sadness\\n\",\n            \"predicted sentiment: sadness\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\",\n            \"text: i am feeling pretty fearless\\n\",\n            \"\\n\",\n            \"Actual sentiment: joy\\n\",\n            \"predicted sentiment: joy\\n\",\n            \"=====================================================================\\n\",\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"iq8M8nbTSJlE\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"#### Test Metrics\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"S-oIXmoCR6kl\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"98bdff55-aa82-45a3-dc13-be0e78e52ea9\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 66,\n          \"referenced_widgets\": [\n            \"8933ab7f935e4776970ddfe35f5da135\",\n            \"84eb2bf17a9048fc94b6f47867d1b0ba\",\n            \"cdd7554792cf4c73922e2f050d1fcaaf\",\n            \"a32aa193a82f478387c14f384c2c689e\",\n            \"e4cbd76c110541cbbf1386e299c4d9d6\",\n            \"da67548f1abc4727965f72b8cb367681\",\n            \"63b11aa7ee0c4271aedb87ad3e7d23c3\",\n            \"720b90b3f86c4e5da15447777806e9a7\"\n          ]\n        }\n      },\n      \"source\": [\n        \"dataset = EmotionDataset(tokenizer, 'emotion_data', 'test', 512)\\n\",\n        \"loader = DataLoader(dataset, batch_size=32, num_workers=4)\\n\",\n        \"model.model.eval()\\n\",\n        \"outputs = []\\n\",\n        \"targets = []\\n\",\n        \"for batch in tqdm(loader):\\n\",\n        \"  outs = model.model.generate(input_ids=batch['source_ids'].cuda(), \\n\",\n        \"                              attention_mask=batch['source_mask'].cuda(), \\n\",\n        \"                              max_length=2)\\n\",\n        \"\\n\",\n        \"  dec = [tokenizer.decode(ids) for ids in outs]\\n\",\n        \"  target = [tokenizer.decode(ids) for ids in batch[\\\"target_ids\\\"]]\\n\",\n        \"  \\n\",\n        \"  outputs.extend(dec)\\n\",\n        \"  targets.extend(target)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"8933ab7f935e4776970ddfe35f5da135\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=0.0, max=63.0), HTML(value='')))\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"colab_type\": \"code\",\n        \"id\": \"C9CYCGM6SRzb\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"for i, out in enumerate(outputs):\\n\",\n        \"  if out not in emotions:\\n\",\n        \"    print(i, 'detected invalid prediction')\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"colab_type\": \"code\",\n        \"outputId\": \"24a4fe9c-3396-4364-aad3-8da50d456618\",\n        \"id\": \"iE0WX_GbSRzq\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 34\n        }\n      },\n      \"source\": [\n        \"metrics.accuracy_score(targets, outputs)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"execute_result\",\n          \"data\": {\n            \"text/plain\": [\n              \"0.929\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          },\n          \"execution_count\": 82\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"colab_type\": \"code\",\n        \"outputId\": \"01a97ad3-3c70-43b6-e6a4-55ea5ccfa010\",\n        \"id\": \"mWkOZ7BASRz5\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 235\n        }\n      },\n      \"source\": [\n        \"print(metrics.classification_report(targets, outputs))\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"              precision    recall  f1-score   support\\n\",\n            \"\\n\",\n            \"       anger       0.94      0.93      0.93       275\\n\",\n            \"        fear       0.86      0.92      0.89       224\\n\",\n            \"         joy       0.97      0.93      0.95       695\\n\",\n            \"        love       0.79      0.89      0.84       159\\n\",\n            \"     sadness       0.97      0.96      0.97       581\\n\",\n            \"    surprise       0.75      0.74      0.75        66\\n\",\n            \"\\n\",\n            \"    accuracy                           0.93      2000\\n\",\n            \"   macro avg       0.88      0.90      0.89      2000\\n\",\n            \"weighted avg       0.93      0.93      0.93      2000\\n\",\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"W6p9MGb6lWL5\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"Now lets plot  the confusion matrix and see for which classes our model is getting confused\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"9RtgfuzucFeN\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"0dc41da4-f99e-4469-8d0c-f055d4a18a8d\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 70\n        }\n      },\n      \"source\": [\n        \"import seaborn as sn\\n\",\n        \"import pandas as pd\\n\",\n        \"import matplotlib.pyplot as plt\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"/usr/local/lib/python3.6/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\\n\",\n            \"  import pandas.util.testing as tm\\n\"\n          ],\n          \"name\": \"stderr\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"2ioVvq5rcHZE\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"cm = metrics.confusion_matrix(targets, outputs)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"4rM5XS09SSdm\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"171788f5-4c43-485c-b84a-133ad78e2486\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 462\n        }\n      },\n      \"source\": [\n        \"df_cm = pd.DataFrame(cm, index = [\\\"anger\\\", \\\"fear\\\", \\\"joy\\\", \\\"love\\\", \\\"sadness\\\", \\\"surprise\\\"], columns = [\\\"anger\\\", \\\"fear\\\", \\\"joy\\\", \\\"love\\\", \\\"sadness\\\", \\\"surprise\\\"])\\n\",\n        \"plt.figure(figsize = (10,7))\\n\",\n        \"sn.heatmap(df_cm, annot=True, cmap='Purples', fmt='g')\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:numexpr.utils:NumExpr defaulting to 4 threads.\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"execute_result\",\n          \"data\": {\n            \"text/plain\": [\n              \"<matplotlib.axes._subplots.AxesSubplot at 0x7f213a4498d0>\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          },\n          \"execution_count\": 86\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"image/png\": \"iVBORw0KGgoAAAANSUhEUgAAAiYAAAGbCAYAAADwcltwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nOzdeZgU1fX/8feZGdZBhn1AHWRVf4qSKBpxRxQVUBnZXCIuCEkElCRKNOCSRIMxi4p+VcANN1xwQ0WIigqKCkgUNCSCRBSEAVkFRGDm/P7oAgdlZgrsnqru+bx46pmq21Vdp4vu6tP33rpl7o6IiIhIHGRFHYCIiIjIdkpMREREJDaUmIiIiEhsKDERERGR2FBiIiIiIrGRk+odTJ78iS77CaHzSa2jDiFtZGVZ1CGIiJQrO6dyT1Qn2nVJ+659w/8Y6UlWNSYiIiISGymvMREREZHUMsucmmTVmIiIiEhsqMZEREQk3WVOhYkSExERkXRnGXRRgJpyREREJDZUYyIiIpLmMqjvqxITERGRtJdBmYmackRERCQ2VGMiIiKS5jKowkSJiYiISLrTVTkiIiIiKaAaExERkXSXQW05SkxERETSXAblJWrKERERkfhQjYmIiEiay6S7CysxERERSXeZk5eoKUdERETiQzUmIiIiaS6TxjFRYiIiIpLmMqiLiZpyREREJD5UYyIiIpLuMqjKRImJiIhImsugvERNOSIiIhKemdUzswlm9h8zm29mHc2sgZm9YmYLgr/1g3XNzEaZ2UIzm2tmh1X0/EpMRERE0pxlWdKmEG4HJrv7gUB7YD5wNfCau7cFXguWAU4H2gbTQODuip5ciYmIiEi6M0veVO5uLA84HrgPwN23uPta4CxgXLDaOKBHMH8W8JAnvAvUM7Nm5e1DiYmIiIjsYGYDzWx2qWlgqYdbAiuBB8zsX2Z2r5nlAvnuvixYZzmQH8zvA3xRavslQVmZMrLz65o1K3nkkVv5+uu1mEHHjqdx4oln8vLLj/HOO1OoUycPgG7d+nHwwR1YtaqIkSMvo0mTxLHab78D6Nt3UJQvIRIjrh3OtGlv0qBBA557diIAd9wxiqmvTyUry2jQoCE33fhnmjRpEnGk8bFs2TKuueZqvlq1CjPo07sPF1zQL+qwYmn69OmMvPnPFBeX0KtnLwYMGBB1SLF18imdyc3NJSsrm5ycbJ56ckLUIcWS3lPfSWbnV3cfA4wp4+Ec4DBgiLu/Z2a3812zzfbt3cx8T/efkYlJVlY2PXpcQkFBGzZv3sTf/vZrDjzwJwCceOJZnHTS2T/YpmHDpgwbNqqyQ42VHmcVct655/P74d+9xy6++BKGDLkcgEcefZi777mL66+7IaII4ycnJ5thw4Zx0EEHs3HjRnr17knHjkfTpk2bqEOLleLiYm686U/cO/Y+8vPz6du3D506ddJxKseDD4yjfv36UYcRW3pP7awSb+K3BFji7u8FyxNIJCZFZtbM3ZcFTTUrgseXAgWltt83KCtThU05QY/agorWi5O8vAYUFCTenDVr1iY/v4C1a1dFHFX8dejQgby8vJ3K6tSps2P+m2++yag7WCZD48ZNOOiggwHIzc2lVavWrFhRFHFU8TNv3lyaFzSnoKCA6tWrc3rXrkx9fWrUYUka03sqGu6+HPjCzA4IijoD/wYmAhcGZRcCzwfzE4F+QS5xFLCuVJPPLlVYYxJUyUwCDtmD1xC5VauKWLLkU1q0OID//W8+06e/xMyZr9O8eRt69OhP7dqJL97Vq4u45ZYrqFmzFt26XUDr1gdHHHl83D7qNiZOnMhee9Xh/vsejDqc2Fq6dCnz58/n0EPbRx1K7BQVraBps6Y7lpvm5zN37twII4o3M+PSAf0xM/r07kufPn2iDil29J76nsr9zTgEeNTMqgOLgItJVHQ8aWb9gcXA9jftJKArsBDYFKxbrrCdX+eY2RFhIy7dcWbSpCfCbpZ03377DfffP5Kzzx5AzZq1OeaY07n22jEMG3Y7devW57nn7gMSNSw33HA/w4bdTmHhpTz00N/YvHlTZHHHzRWXD+W1V6fSrVt3Hhv/aNThxNLGjRu5YujlXHP11TvVMonsiUcefpSnJzzD6HvGMH78Y8yePSvqkCTmKvNyYXf/wN07uPuh7t7D3de4+yp37+zubd39ZHdfHazr7j7I3Vu7+yHuPrui5w+bmPwMeMfMPg0GSJlnZmWmpu4+Jgi6Q9eufUPuIrmKi7dx//0j6dDhRNq3PxqAunXrk5WVTVZWFh07nsrixZ8AkJNTjdzcugAUFLShUaOmrFhRbhNYldS9W3deffWVqMOIna1btzJ06BV073YGp5zSJepwYik/vwnLly3fsby8qIgm+fnlbFG15QfHpmHDhnQ++WTmzpsXcUTxo/fU91gSp4iFTUxOBVoDJwFnAN2Dv7Hk7owfP4r8/AI6deqxo3zdutU75ufOfYdmzfYDYMOGdZSUFAPw1VfLWbnySxo2bIrA4sWf7ZifOnUqLVu2ii6YGHJ3rr1uBK1ateKiiy6KOpzYatfuEBZ/vpglS5awZcsWXp40iU6dOkUdVixt2rSJjRs37pifMeNt2rZpG3FU8aP3VOYKdVWOuy82s2OBtu7+gJk1BmJbX71o0b+ZNet1mjVrwS23JK4o6datH3PmvMnSpf8DjIYNm9CnT+KS4IULP+Lllx8lOzsn0abbZxC5uXtF+AqicdWwK5k1ayZr166lc+dOXDZoMNOnT+Ozz/6HWRZ777031117fdRhxsqcOXOYOHEi+++/P4VnFwIwdOhQTjj+hIgji5ecnByGDx/BgIGXUlJSQmHh2fqyLcOqVau4/PIhAGwr3ka3bt057rjjIo4qfvSe2lkmXZhg7hVfamxm1wMdgAPcfX8z2xt4yt2PqWjbyZM/2eNrmauSzie1jjqEtJEVbshkEZHIZOdU7omqR/O/Je279rnPr4z0JBu2KacQOBPYCODuXwJVr0pBREREUirsAGtbSo/kFgw/KyIiInGQQTeYCZuYPGlmo0ncfGcAcAkwNnVhiYiISFiZ1MckbOfXv5nZKcB64ADgOnfXdaMiIiKSVKHvlRMkIkpGREREYiaDKkzCJSZm9jXw/R6/64DZwG/dfVGyAxMREZGQMigzCVtjchuJOwo+RmJcuHNIDLg2B7gfODEVwYmIiEjVEjYxOdPdS9+ZbIyZfeDuvzOz36ciMBEREQkngypMQicmm8ysDzAhWO4FbA7mNYCaiIhIhMLcfC9dhL3y+XzgAmAFUBTM/9zMagGDUxSbiIiIVDFhLxdeRNk37XsreeGIiIjIbsugtpywV+U0BgYALUpv4+6XpCYsERERCSuD8pLQfUyeB6YDrwLFqQtHREREqrKwiUltd/9dSiMRERGRPZJJQ9KH7fz6opl1TWkkIiIismeykjhFLGwIV5BITr4xs/Vm9rWZrU9lYCIiIlL1hL0qZy8zawC0BWqmNiQRERHZHZnUlBP2qpxLSdSa7At8ABwFzAA6py40ERERCSOTEpPdaco5Aljs7p2An5K4iZ+IiIhI0oS9Kmezu282M8yshrv/x8wOSGlkIiIiEorFoNNqsoRNTJaYWT3gOeAVM1sDLE5dWCIiIhJaBjXlhO38WhjM3mBmrwN5wOSURSUiIiJVUtgakx3c/c1UBCIiIiJ7JoMqTHY/MREREZF4sazMyUwyqLuMiIiIpDvVmIiIiKS7DGrLSXlicsrJbVK9i4zw/pylUYeQNg4/bJ+oQxCpkkpKPOoQ0kZ2Je8vg/ISNeWIiIhIfKgpR0REJM1lUudXJSYiIiLpLoPactSUIyIiIrGhGhMREZE0l0EVJkpMRERE0l0m9TFRU46IiIjEhmpMRERE0l3mVJgoMREREUl3lkGdTNSUIyIiIrGhGhMREZE0l0mdX5WYiIiIpLkMaslRU46IiIjEh2pMRERE0l0GVZkoMREREUlzmdTHRE05IiIiEhuqMREREUlzGdSSo8REREQk7WVQZqKmHBEREYkNJSYiIiJpzsySNoXY12dmNs/MPjCz2UFZAzN7xcwWBH/rB+VmZqPMbKGZzTWzwyp6fiUmIiIiac6ykjeF1Mndf+LuHYLlq4HX3L0t8FqwDHA60DaYBgJ3V/TESkxERETkxzoLGBfMjwN6lCp/yBPeBeqZWbPynkiJiYiISLozS9pkZgPNbHapaeD39ubAP83s/VKP5bv7smB+OZAfzO8DfFFq2yVBWZl0VY6IiEiaS+ZFOe4+BhhTzirHuvtSM2sCvGJm//ne9m5mvqf7V2IiIiKS5ipz5Fd3Xxr8XWFmzwJHAkVm1szdlwVNNSuC1ZcCBaU23zcoK5OackRERCQUM8s1s722zwNdgI+AicCFwWoXAs8H8xOBfsHVOUcB60o1+eySakxERETSXeUNsJYPPBtcVpwDPObuk81sFvCkmfUHFgN9gvUnAV2BhcAm4OKKdlClEpPhI4bz5ptv0KBBAyY+/0LU4URu1aoVjB79F9atW4OZ0alTN0499Ww2bFjPnXfeyFdfFdGoUT5DhlxLbu5evPTSE8yYMRWA4uJivvzyc+66awJ16tSN+JVEa/r06Yy8+c8UF5fQq2cvBgwYEHVIsaTjFN7Jp3QmNzeXrKxscnKyeerJCVGHFAsjrh3OtGlv0qBBA557diIAU6ZM5q67/49FixYxfvwTtDu4XcRRRqOy8hJ3XwS030X5KqDzLsodGLQ7+6hSiUlhjx6cf955XH3N1RWvXAVkZ2dz3nm/pEWLtnzzzSauu+5XtGt3ONOmTeHgg3/KGWecywsvjOeFFx7nnHMG0K1bX7p16wvAnDnvMHny01U+KSkuLubGm/7EvWPvIz8/n759+9CpUyfatGkTdWixouO0+x58YBz169ePOoxY6XFWIeedez6/H/7dObxN27bcduso/vDHG6ILTJKqSvUx6dDhCPLy6kUdRmzUq9eQFi3aAlCrVm323rs5q1d/xZw5MzjuuC4AHHdcF95//+0fbPvuu1Pp2LFTpcYbR/PmzaV5QXMKCgqoXr06p3ftytTXp0YdVuzoOEkydOjQgby8vJ3KWrdqTcuWLSOKKD4sy5I2Ra3cxMTMsr9/GZBkppUrl7N48ULatDmQ9evXUK9eQwDy8hqwfv2andb99tvNzJ07myOOOC6KUGOlqGgFTZs13bHcND+fFUVFEUYUTzpOu8fMuHRAf3r17smTTz4ZdTiSDpI4jknUyk1M3L0Y+K+ZNd+dJy09OMvYseVdCi1xsHnzN4wa9QfOP/8yatXK3emxRAennd+o//rXO7Rte3CVb8YRSZVHHn6Upyc8w+h7xjB+/GPMnj0r6pBEKk2YPib1gY/NbCawcXuhu59Z1galB2cp3layx4OsSOpt27aNUaNu4OijO++oAalbtz5r166iXr2GrF27irp1d27+evfdN9SME8jPb8LyZct3LC8vKqJJfn45W1RNOk67Jz84Ng0bNqTzySczd948OnQ4IuKoJM5iUNGRNGH6mFwLdAf+CPy91CRpzt25996/sffe+3H66b12lB92WEemT/8nANOn/5PDDjt6x2ObNm3gP/+Zu1NZVdau3SEs/nwxS5YsYcuWLbw8aRKdOilp+z4dp/A2bdrExo0bd8zPmPE2bdu0jTgqibtM6mNSYY2Ju79ZGYFUhiuv/C0zZ81k7dq1dDrpRAYPGkzPnr0q3C5TffLJR7z99qsUFLRk+PBfANC79yV0734Od955I2++OZlGjZowePC1O7aZPftt2rU7nJo1a0UVdqzk5OQwfPgIBgy8lJKSEgoLz9aXyC7oOIW3atUqLr98CADbirfRrVt3jjtO/bkArhp2JbOCc3jnzp24bNBg8vLyGPnnm1i9ZjWXXfYrDjzwQMaMHht1qPIjWOIS43JWSIzUdgfw/4DqQDaw0d1DdTBQU044788pd4ReKeXww8q9/5OIpEiJTuehVaueXalVD0N6P5a0/5w7njov0mqTMH1M7gTOAZ4COgD9gP1TGZSIiIjshuhbYJIm1Dgm7r4QyHb3Ynd/ADgttWGJiIhIVRSmxmSTmVUHPjCzW4BlVLGB2UREROIsDp1WkyVMgnFBsN5gEpcLFwA9UxmUiIiIhGdmSZuiFuaqnMVmVgto5u5/qISYREREpIqqsMbEzM4APgAmB8s/MbOJqQ5MREREQsqy5E1Rv5QQ69wAHAmsBXD3DwDdMUlERCQmMuhWOaESk63uvu57ZbqYXURERJIuzFU5H5vZeUC2mbUFLgdmpDYsERERCSsOnVaTpcwaEzN7OJj9FDgY+BYYD6wHhqY+NBEREQklg/qYlFdjcriZ7Q30BTqx8437agObUxmYiIiIVD3lJSb3AK8BrYDZpcqNRB+TVimMS0RERELKoJacshMTdx8FjDKzu939V5UYk4iIiOyGKjXyq5ISERERqSxhrsoRERGROMugthwlJiIiImmuSlwuLCIiIlLZVGMiIiKS5iyDqhmUmIiIiKQ5NeWIiIiIpIBqTERERNJdBtWYKDERERFJc5nUxySDXoqIiIikO9WYiIiIpLlM6vyqxERERCTdVaV75YiIiIhUFtWYiIiIpDk15UjSHX7YPlGHkDY6V7sh6hDSwssbRkQdQlqoXkOnwbCyMqi5INNkUF6iphwRERGJD/1UEBERSXcZVJulxERERCTNZVIfEzXliIiISGyoxkRERCTNZVCFiRITERGRtJdBfUzUlCMiIiKxoRoTERGRNJdJnV+VmIiIiKQ5U1OOiIiISPKpxkRERCTdZU6FiRITERGRdJdJfUzUlCMiIiKxoRoTERGRNKfOryIiIhIbZpa0KeT+ss3sX2b2YrDc0szeM7OFZvaEmVUPymsEywuDx1tU9NxKTERERNKdJXEK5wpgfqnlvwC3unsbYA3QPyjvD6wJym8N1iuXEhMREREJzcz2BboB9wbLBpwETAhWGQf0CObPCpYJHu9sFVTLKDERERFJc8lsyjGzgWY2u9Q08Hu7uw0YBpQEyw2Bte6+LVheAuwTzO8DfAEQPL4uWL9M6vwqIiKS5pJ5tbC7jwHG7Ho/1h1Y4e7vm9mJydvrd5SYiIiISFjHAGeaWVegJlAXuB2oZ2Y5Qa3IvsDSYP2lQAGwxMxygDxgVXk7UFOOiIhImjNL3lQed7/G3fd19xbAOcBUdz8feB3oFax2IfB8MD8xWCZ4fKq7e3n7UI2JiIhImovByK+/Ax43sxuBfwH3BeX3AQ+b2UJgNYlkplxKTERERGS3ufsbwBvB/CLgyF2ssxnovTvPq8REREQkzUVfYZI8SkxERETSXAyacpJGnV9FREQkNlRjIiIikuYyqMKkaiUm3377Lf36XcCWLVvYVryNLl1OZcjgIVGHFUvTp09n5M1/pri4hF49ezFgwICoQ4pUnbyaXHXvWbRs1wR3+Mslz/Hvd78AoM9vjuayv5/GWY1uZt2qTQD85IQWDL7tdLKrZbPuq00MPfH+KMOPxLfffssll17E1i1b2FZczMmdT+GyXw3ivffe5dbb/0FJSQm1a9fmjzfcSPPmzaMONzZ0ngrv5FM6k5ubS1ZWNjk52Tz15ISKN8pQmdSUU6USk+rVq3P//Q+Qm5vL1q1b+fkFP+f4446jffufRB1arBQXF3PjTX/i3rH3kZ+fT9++fejUqRNt2rSJOrTIDL79dGZOXsD1vZ8gp1o2NWtXA6DxvnXp0KUNyxev3bFunbyaDL2rO8NOe5gVX6yjXuPcqMKOVPXq1Rk7+j5q167N1q1bubj/hRx7zLHcNPJGbvvHKFq1asUTTz7O2PtG86c/3BR1uLGh89TuefCBcdSvXz/qMCSJKuxjYmZnmFlG9EUxM3JzE18S27ZtY9u2rZlV/5Uk8+bNpXlBcwoKCqhevTqnd+3K1NenRh1WZHLr1qD98S146b45AGzbWsyGdZsBGHzr6YweNgVKjRfU+bxDmP7MfFZ8sQ6AtSs3Vn7QMWBm1K5dG9j+edu2414cGzduAGDDhg00btQkyjBjR+cp2ROVNcBaZQhTY9IXuM3Mngbud/f/pDimlCouLqZX7158/vnnnHfuubQ/tH3UIcVOUdEKmjZrumO5aX4+c+fOjTCiaDVrWZ+1Kzdy9QOFtG7flE/e/5I7rpjE4Se3ZuXS9Xw6t2in9Qv2b0R2tSxue/1iau1Vg6dvf4d/PvxhRNFHq7i4mHPP78sXX3xO3z7ncMghh3L9tTcw+PLLqFGjBnVy6/DQuEejDjN2dJ4Kx8y4dEB/zIw+vfvSp0+fqEOKjBGDjCJJKqwJcfefAz8FPgUeNLN3gjsP7lXWNqXvTDh27C7vAxSZ7Oxsnn3mWV6f+jrz5s1jwYJPog5JYi47J4v9D2vG83fPYsBhd/PNxi1cdEMnzv/98Txw3Q9rkrJzsjjg8L25utsjDDv1IfpdeyL7ti33ZpoZKzs7mycfn8CUya/y0ccfsXDhAh559GHuHHUX/5z8Gmee2YO//+OvUYcZOzpPhfPIw4/y9IRnGH3PGMaPf4zZs2dFHZIkQagmGndfD0wAHgeaAYXAHDPbZY8sdx/j7h3cvcOAAd+/W3I81K1blyOPPJLpb70VdSixk5/fhOXLlu9YXl5URJP8/AgjitbKJetZuWQ982cuAeDNCf+m7WF706xlPe778DIe/9+vabxvXcbM+SUN8uuwcsl6Zk5ZyOZNW1m3ahMfTvuM1u2bVrCXzFZ3r7oc0eEI3nr7LT5Z8F8OOeRQAE7tchoffvhBxNHFl85T5csPzksNGzak88knM3fevIgjik4mNeWE6WNyppk9S2LY2WrAke5+OtAe+G1qw0uu1atXs379egA2b97MjHfeoVXLlhFHFT/t2h3C4s8Xs2TJErZs2cLLkybRqVOnqMOKzOqiDaz4Yj0F+ydqPQ7v3IoFc76kMP8Wzml5K+e0vJWVS9Yz8LB7WF20gbeen88hx+5HdnYWNWpV46Cf7cvn81dG/Coq3+o1q1n/9Xeft3fffZdWLVuxYcMGFi/+DIB333uHli1bRRhl/Og8Fc6mTZvYuHHjjvkZM96mbZu2EUcVnUxKTML0MekJ3Oru00oXuvsmM+ufmrBSY+XKlVzz+2soKSmmpKSE0049jRNPrLpfuGXJyclh+PARDBh4KSUlJRQWnl2lP/AAo4a8xIhHe5FTPZtli9Zw88XPlrnu5//5ipmTF3Df3MvwEuele+fwv49XVGK08fDVypVce/0ISoqLKXGnyyldOP74E7huxA389qpfk2VZ7FW3Ln+4/o9RhxorOk+Fs2rVKi6/PFFpv614G926dee4446LOCpJBqvg7sOJlczygSOCxZnuHvosW7ytpOIdiOyGztVuiDqEtPDyhhFRh5AWqteoUqMmSCXJzsmq1LqHf9zyZtK+a38z7IRI603CNOX0BmaSuDtgH+A9M+uV6sBEREQknKrWlDMCOGJ7LYmZNQZeJdEZVkRERCRpwiQmWd9rulmFbv4nIiISH3Go6kiSMInJZDObAowPls8BXk5dSCIiIrI7MigvqTgxcferzOxs4Jig6B53fy61YYmIiEhVVGZiYmZvufuxZvY14LBjvNuBZlYCrAb+6u53VUKcIiIiUoYqcXdhdz82+LvLoefNrCEwA1BiIiIiEqEMykv2vBOru68CTkxeKCIiIlLV/aiRhdx9WbICERERkT1TJZpyREREJD1kUF6i8UhEREQkPlRjIiIikuYyqMJEiYmIiEi6y6Q+JmrKERERkdhQjYmIiEiay6AKEyUmIiIi6U5NOSIiIiIpoBoTERGRNJdBFSZKTERERNKdmnJEREREUkA1JiIiImkugypMlJiIiIiku0xKTNSUIyIiIrGhGhMREZE0l0mdX5WYiIiIpLkMykvUlCMiIiLxoRoTSTuvfHt91CGkhaVfros6hLRQsG+9qEMQ+dHUlCMiIiLxkTl5iZpyREREJD5UYyIiIpLm1JQjIiIisaHERERERGIjg/IS9TERERGR+FCNiYiISJpTU46IiIjERgblJWrKERERkfhQjYmIiEiay6SmHNWYiIiIpDkzS9pUwX5qmtlMM/vQzD42sz8E5S3N7D0zW2hmT5hZ9aC8RrC8MHi8RUWvRYmJiIiIhPUtcJK7twd+ApxmZkcBfwFudfc2wBqgf7B+f2BNUH5rsF65lJiIiIikObPkTeXxhA3BYrVgcuAkYEJQPg7oEcyfFSwTPN7ZKqiWUWIiIiKS5pLZlGNmA81sdqlp4Pf2lW1mHwArgFeAT4G17r4tWGUJsE8wvw/wBUDw+DqgYXmvRZ1fRUREZAd3HwOMKefxYuAnZlYPeBY4MJn7V2IiIiKS5iyr8q/Kcfe1ZvY60BGoZ2Y5Qa3IvsDSYLWlQAGwxMxygDxgVXnPq6YcERGRNFdZfUzMrHFQU4KZ1QJOAeYDrwO9gtUuBJ4P5icGywSPT3V3L28fqjERERGRsJoB48wsm0TlxpPu/qKZ/Rt43MxuBP4F3Besfx/wsJktBFYD51S0AyUmIiIiaa6yBlhz97nAT3dRvgg4chflm4Heu7MPJSYiIiJpLoMGflUfExEREYkP1ZiIiIikuUy6V44SExERkTSXSYmJmnJEREQkNlRjIiIikuYyqMJEiYmIiEjay6DMRE05IiIiEhtVKjFZtmwZF110Id3P6M4ZZ3bn4Ycfijqk2Jo+fTpdu53OqaedytixY6MOJ1ZGXDuc4084lh6FZ/7gsQfHPUC7Qw5izZo1EUQWvb///Ub69OnKwIHn/+CxCRMe49RTO7Ju3VoApk6dwi9/+XN+8YvzGTp0AJ9+uqCyw42l4SOGc+xxx3DmWWdEHUrs6Tz1nWTeXThqVSoxycnJZtiwYbz4wos8Pv4JHhv/GAsXLow6rNgpLi7mxpv+xOh7xvDCxBeYNOklHadSepxVyD13//DGm8uWL2PGjBk0a9YsgqjioUuXbtx0060/KF+xoog5c2bSpEnTHWX5+c3461/vYvToRzn//Eu4/fabKzPU2Crs0YMxo8u8sasEdJ7aWWXdK6cyVKnEpHHjJhx00MEA5Obm0qpVa1asKIo4qviZN28uzQuaU1BQQPXq1StI0YEAACAASURBVDm9a1emvj416rBio0OHDuTl5f2g/JZb/sJvfvPbWPziiMohh/yUvfaq+4Py0aNvp3//QTud9A4++NAd6x544MF89dWKygoz1jp0OIK8vHpRhxF7Ok9lrtCJiZntZ2YnB/O1zGyv1IWVekuXLmX+/Pkcemj7qEOJnaKiFTRt9t0v26b5+awoUgJXnqlTX6NJkyYceMCBUYcSOzNmTKNRo8a0bt22zHUmT36BI47oWIlRSbrTeWpnlmVJm6IWKjExswHABGB0ULQv8Fw56w80s9lmNnvs2PhVSW7cuJErhl7ONVdfTZ06daIOR9LcN998w9h7xzB40JCoQ4mdzZs38/jj4+jXb0CZ63zwwftMmfIC/fsPqsTIRDJLJjXlhL1ceBCJuwa+B+DuC8ysSVkru/sYYAxA8bYS/7FBJtPWrVsZOvQKunc7g1NO6RJ1OLGUn9+E5cuW71heXlREk/z8CCOKty+++IKlS5fSs1chAEVFRfTu05PHxz9Bo0aNI44uWsuWLWH58mX86lcXALBy5UoGDbqIUaPuo0GDhixatJDbbhvJjTf+g7p1f9g8JlIWnacyV9jE5Ft337K97dzMcoBYJRxhuDvXXjeCVq1acdFFF0UdTmy1a3cIiz9fzJIlS2jSpAkvT5rELX/9a9Rhxdb+++/PtDff2rHc5dSTeeLxp6hfv36EUcVDy5ZtePLJSTuW+/Ur5I47HiAvrx4rViznj3+8mquuuo59920eYZSSjnSe2lkm9W0Lm5i8aWa/B2qZ2SnAZcALqQsrNebMmcPEiRPZf//9KTw78et26NChnHD8CRFHFi85OTkMHz6CAQMvpaSkhMLCs2nbpuz+AVXNVcOuZNasmaxdu5bOnTtx2aDB9Dy7Z9RhxcLIkdcxd+4c1q1by/nnn8kFF1zKaaf98LJqgEcfvZ+vv17PnXf+DYDs7GzuvPOBygw3lq688rfMDN5fnU46kcGDBtOzZ6+ow4odnad2lkmJiblXXPFhZllAf6ALYMAU4F4PsXHcmnIk/ZXoLRXK0i/XRR1CWijYV1fASPJl51RuL9KXXvpP0k6M3bodGGmWE7bGpAfwkLtX7RFsREREYiiDKkxCXy58BvCJmT1sZt2DPiYiIiISA1Vu5Fd3vxhoAzwFnAt8amb3pjIwERERqXpC13y4+1Yze5nE1Ti1SDTvXJqqwERERCScONR0JEvYAdZON7MHgQVAT+BeoGm5G4mIiEilqIoDrPUDngB+4e7fpjAeERERqcJCJSbufq6Z5QOnBNVFM91dd9wSERGJgarYlNMbmAn0BvoA75mZRvwRERGJgUy6KidsU84I4IjttSRm1hh4lcSN/URERESSImxikvW9pptVhB8DRURERFIoBhUdSRM2MZlsZlOA8cFyX2BSOeuLiIhIJbHKHQE/pcJ2fr3KzHoCxwRFY9z92dSFJSIiIlXR7gyw9jTwdApjERERkT1QZZpyzOxrEiO9/uAhwN29bkqiEhERkdCMzMlMyk1M3H2vygpERERERHcJFhERSXeZU2GixERERCTdxWFgtGTRWCQiIiISG6oxERERSXMZVGGixERERCTdqSlHREREJAVUYyIiIpLmMqjCRImJiIhIusukphwlJiIiImkug/IS9TERERGR+FCNiYiISJpTU85uKCnZ1T0A5fsy6D2VcllZOlhhFOxbL+oQ0sK57e+IOoS0Mf7DIVGHIGXIpO8QNeWIiIhIbKgpR0REJM1lUo2JEhMREZE0Zxl0e2E15YiIiEhsqMZEREQkzakpR0RERGIjky4XVlOOiIiIhGJmBWb2upn928w+NrMrgvIGZvaKmS0I/tYPys3MRpnZQjOba2aHVbQPJSYiIiJpzix5UwW2Ab9194OAo4BBZnYQcDXwmru3BV4LlgFOB9oG00Dg7op2oMREREQkzZlZ0qbyuPsyd58TzH8NzAf2Ac4CxgWrjQN6BPNnAQ95wrtAPTNrVt4+lJiIiIjIDmY20Mxml5oGlrFeC+CnwHtAvrsvCx5aDuQH8/sAX5TabElQViZ1fhUREUlzyez76u5jgDHl78/qAE8DQ919femaFnd3M9vj+9EoMREREUlzlXlVjplVI5GUPOruzwTFRWbWzN2XBU01K4LypUBBqc33DcrKpKYcERERCcUSGdB9wHx3/0ephyYCFwbzFwLPlyrvF1ydcxSwrlSTzy6pxkRERCTdVV6FyTHABcA8M/sgKPs9cDPwpJn1BxYDfYLHJgFdgYXAJuDiinagxERERCTNVVZTjru/RdlpUOddrO/AoN3Zh5pyREREJDZUYyIiIpLmMmhEeiUmIiIi6U73yhERERFJAdWYiIiIpLnMqS9RYiIiIpL21JQjIiIikgKqMREREUlzGVRhosREREQk3akpR0RERCQFVGMiIiKS5jKowkSJiYiISLrLpMRETTkiIiISGxlfYzLi2uFMm/YmDRo04LlnJwIwZcpk7rr7/1i0aBHjxz9Bu4PbRRxl/Dz88MNMePop3J1evXrT74J+UYcUS8uWLeOaa67mq1WrMIM+vftwgY7VLk2fPp2RN/+Z4uISevXsxYABA6IOKVJ3/vNiNm/cQkmJU7ythGv6Pk7vy35G517tWL/mGwDG3zaDf03/jOxqWQy8vjOtD25CiTsPjnyTf89aGvEriJ7eU9/JpM6vGZ+Y9DirkPPOPZ/fD796R1mbtm257dZR/OGPN0QXWIwtWLCACU8/xePjn6BatWr84pcDOeGEE9iv+X5RhxY7OTnZDBs2jIMOOpiNGzfSq3dPOnY8mjZt2kQdWqwUFxdz401/4t6x95Gfn0/fvn3o1KlTlT9Of7j4ab5eu3mnspce+hcvPDhnp7KTeyV+PF1Z+Ch1G9Ti9/ecxTV9H8e90kKNHb2ndpZBecnuN+WYWX0zOzQVwaRChw4dyMvL26msdavWtGzZMqKI4m/Rok859JBDqVWrFjk5OXTocASvvvpq1GHFUuPGTTjooIMByM3NpVWr1qxYURRxVPEzb95cmhc0p6CggOrVq3N6165MfX1q1GGljX1bN+Cj974AYP3qb9j49RZatcuPOKpo6T2VuUIlJmb2hpnVNbMGwBxgrJn9I7WhSVTatGnL+3PeZ+3atXzzzTdMnz6N5cuXRR1W7C1dupT58+dz6KHtow4ldoqKVtC0WdMdy03z81lRVMUTOHeGjy3k5ifPoXPv75qTTz2vPX995nx+9aeTya1bA4DP/vsVHTq1IivbaLxPXVod1IRGTfeKKvJY0HtqZ2aWtClqYZty8tx9vZldCjzk7teb2dyyVjazgcBAgLv+724uvbTqtvulo9atW9P/kksZMPBSatWqxYEHHEhWVnbUYcXaxo0buWLo5Vxz9dXUqVMn6nAkDVx7wVOsWbGRug1qMeLeQr5ctJp/PjGPCffMBHf6DulIv6uO4+5rX+X1Zz5m31YNuPnJc1n55Xr++8EySopLon4JIikRNjHJMbNmQB9geEUru/sYYAzA1i3FVbgVNH317NmTnj17AnDbbbeS37RpBVtUXVu3bmXo0Cvo3u0MTjmlS9ThxFJ+fhOWL1u+Y3l5URFN8qt2U8SaFRuBRNPMrFc/pc0hTZn//pc7Hn9twkf87q4zASgpdsb9ZdqOx/70SG++XLy2cgOOGb2nMlfYPiZ/BKYAC919lpm1AhakLiyJ2qpVqwD4ctmXvPraq3Tr2i3iiOLJ3bn2uhG0atWKiy66KOpwYqtdu0NY/PlilixZwpYtW3h50iQ6deoUdViRqVErh5q1q+2YP/To5ny+cBX1GtXesc6RJ7fhiwWJz2H1mjnUqJX4HXlIx+YUFztLP11d+YHHiN5TO6tyTTnu/hTwVKnlRUDPVAWVTFcNu5JZs2aydu1aOnfuxGWDBpOXl8fIP9/E6jWrueyyX3HggQcyZvTYqEONlaG/voK1a9eSk1ONEcNHULdu3ahDiqU5c+YwceJE9t9/fwrPLgRg6NChnHD8CRFHFi85OTkMHz6CAQMvpaSkhMLCs2nbpm3UYUUmr2FtrhzVHYDs7Czeeum/fPjWYgaP7EKLAxvjDiu/XM+YG15LrN+gFsPHFFJS4qxesYE7r54SZfixoPfUzmKQTySNeYjrzczsFuBG4BtgMnAo8Gt3f6SibdWUE04mvalSLQ4ZvWSOc9vfEXUIaWP8h0OiDiFtZOdkVeqJ6tNPVyXtu7Z164aRnmTDNuV0cff1QHfgM6ANcFWqghIREZGqKXTn1+BvN+Apd1+nX60iIiLxkElfyWETkxfN7D8kmnJ+ZWaNgc0VbCMiIiKyW8J2fr066Geyzt2LzWwTcFZqQxMREZEwjMypMgk78mtt4DLg7qBob6BDqoISERGR3WBJnCIWtvPrA8AW4OhgeSmJq3REREREkiZsYtLa3W8BtgK4+yZikVeJiIiIWfKmqIXt/LrFzGoBDmBmrYFvUxaViIiIhJZJfUzCJibXkxhYrcDMHgWOAS5KVVAiIiJSNYW9KucVM5sDHEWiCecKd/8qpZGJiIhIOJlTYRK6xgSgJrAm2OYgM8Pdp1WwjYiIiKRYBuUl4RITM/sL0Bf4GCgJih1QYiIiIiJJE7bGpAdwgLurw6uIiEjMZNJtYsImJouAauhKHBERkfjJnLwkdGKyCfjAzF6jVHLi7penJCoREREJLYPyktCJycRgEhEREUmZsJcLj0t1ICIiIrJnqkwfEzObRzDa6664+6FJj0hERESqrIpqTLoHfwcFfx8O/v6cchIWERERkT1RbmLi7osBzOwUd/9pqYd+F4wEe3UqgxMREZGKZVBLTui7C5uZHVNq4ejd2FZERERSyMySNkUt7FU5/YH7zSyPxFVJa4BLUhaViIiIVElhr8p5H2gfJCa4+7qURiUiIiJVUuib+JlZN+BgoOb2qh53/2OK4hIREZGQYtACkzSh+omY2T0kbuI3hERTTm9gvxTGJSIiIlVQ2A6sR7t7P2CNu/8B6Ajsn7qwREREJCxL4r+ohW3K2Rz83WRmewOrgWZhNszKiv5FpoOSEg0LE1YmVVlK9MZ/OCTqENLG5s1bow4hbeTWqVG5O8yg82LYxOQFM6sH/BWYQ2JwtbEpi0pERESqpLBNOf8Bit39aeD/gHeB51IWlYiIiIRmlryp4n3Z/Wa2wsw+KlXWwMxeMbMFwd/6QbmZ2SgzW2hmc83ssIqeP2xicq27f21mxwInAfcCd4fcVkRERFLIkjiF8CBw2vfKrgZec/e2wGt8NzL86UDbYBpIiNwhbGJSHPztBox195eA6iG3FRERkQzh7tNI9DUt7SxgXDA/DuhRqvwhT3gXqGdm5fZRDZuYLDWz0SQuGZ5kZjV2Y1sRERFJpSS25ZjZQDObXWoaGCKCfHdfFswvB/KD+X2AL0qttyQoK1PYzq99SFTb/M3d1wbZzlUhtxUREZEUSuZFOe4+BhjzI7Z3M9vjS03DDkm/CXim1PIyYFnZW4iIiEgVUmRmzdx9WVB5sSIoXwoUlFpv36CsTGqOERERSXOVeVVOGSYCFwbzFwLPlyrvF1ydcxSwrlSTzy6FvleOiIiIxFQljjxpZuOBE4FGZrYEuB64GXjSzPoDi0l0AQGYBHQFFgKbgIsrfH731I44WrxNQ5qGoZFfw9NowiLR0Miv4eXWqVGpJ6pVX21M2pdIw0a5kZ5kVWMiIiKS5jLp55oSExERkTSXSfcQU+dXERERiQ3VmIiIiKS9zKkyUWIiIiKS5tSUIyIiIpICSkxEREQkNtSUIyIikubUlCMiIiKSAqoxERERSXuZU2WixERERCTNqSlHREREJAWUmIiIiEhsqClHREQk3akpR0RERCT5VGMiIiKS5iyDqkxUYyIiIiKxUeUSk+nTp9O12+mcetqpjB07NupwYmXEtcM5/oRj6VF45o6yO+4YReHZPejZq5ABAy9lxYoVEUYYP8NHDOfY447hzLPOiDqU2NNnL5xvv/2Wvn37UFjYgzPO7M4dd94RdUixU1xczLnn9eHyKwYDMHPme5x3Xh969ynkuuuGs23btogjlB+jSiUmxcXF3HjTnxh9zxhemPgCkya9xMKFC6MOKzZ6nFXIPXeP2ans4osv4dlnnuPpCc9ywgkncPc9d0UUXTwV9ujBmNFjKl6xitNnL7zq1atz//0P8Oyzz/HM08/y1ltv8eGHH0QdVqyMH/8oLVu0BKCkpITrbxjByJG38NSTz9Ks2d68+OLEiCOsfGbJm6JWpRKTefPm0rygOQUFBVSvXp3Tu3Zl6utTow4rNjp06EBeXt5OZXXq1Nkx/80332BxeNfGSIcOR5CXVy/qMGJPn73wzIzc3FwAtm3bxrZtW+PxbRETRUXLmf7WNHr0OBuAdevWUi2nGvvt1wKAnx11FK9NfTXCCOXHCpWYmFm+md1nZi8HyweZWf/UhpZ8RUUraNqs6Y7lpvn5rCgqijCi9HD7qNvofPJJvPTSiwweNCTqcCQN6bO3e4qLiyk8u5BjjzuWozseTftD20cdUmz87e+3cMUVvyErK/H1Va9efbYVF/Pvf38MwGuvvkLR8uVRhig/UtgakweBKcDewfInwNCyVjazgWY228xmjx2rau50d8XlQ3nt1al069adx8Y/GnU4IhkvOzubZ595ltenvs68efNYsOCTqEOKhWnT3qRB/QYc9P8O2lFmZowceQt/+/stXNDvPGrn5pKVnR1hlBHJoLacsJcLN3L3J83sGgB332ZmxWWt7O5jgDEAxdtK/MeHmRz5+U1Yvuy7THp5URFN8vMjjCi9dO/WnV9d9kvVmshu02dvz9StW5cjjzyS6W+9Rdu2+0cdTuQ+/PAD3pz2Bm+9/RZbtnzLxg0bGT7iGm66cST33zcOgHfemcHnixdHHGnliz6dSJ6wNSYbzawh4ABmdhSwLmVRpUi7doew+PPFLFmyhC1btvDypEl06tQp6rBibfHiz3bMT506lZYtW0UXjKQtffbCW716NevXrwdg8+bNzHjnHVq1bBlxVPEwZMgVTH75VV56cTIj/3wLHY44kptuHMnq1asA2LJlCw+Ou5+ePXtHHKn8GGFrTH4DTARam9nbQGOgV8qiSpGcnByGDx/BgIGXUlJSQmHh2bRt0zbqsGLjqmFXMmvWTNauXUvnzp24bNBgpk+fxmef/Q+zLPbee2+uu/b6qMOMlSuv/C0zg2PW6aQTGTxoMD17pt1HI+X02Qtv5cqVXPP7aygpKaakpITTTj2NE09UEleecQ89yPTp03AvoVevPhx55M+iDqnyZVCVibmHa2kxsxzgABIv/7/uvjXMdnFqyomzEh2m0LKyMugTKJJGNm8OddoXILdOjUo9UW3a8G3SvkRqV3Ls3xf2qpzeQC13/xjoATxhZoelNDIRERGpcsL2MbnW3b82s2OBzsB9wN2pC0tERERCy6CrcsImJtuvwOkGjHX3l4DqqQlJREREqqqwiclSMxsN9AUmmVmN3dhWREREJJSwyUUfEgOsnerua4EGwFUpi0pERERCsyROUSv3cmEzq+vu64GawBtBWQPgW2B2yqMTERGRisUho0iSisYxeQzoDrxPYnC10i/dAY22JSIiEjHLoMyk3MTE3btb4nayJ7j755UUk4iIiFRRFfYx8cQIbC9VQiwiIiKyJzKok0nYzq9zzOyIlEYiIiIieySD8pLQ98r5GXC+mS0GNpKI3d390JRFJiIiIlVO2MTk1JRGISIiInsuDlUdSRIqMXH3xcG9cY4lcTXO2+4+J6WRiYiISEiZk5mEvYnfdcA4oCHQCHjAzEakMjARERGpeixx0U0FK5n9F2jv7puD5VrAB+5+QEXbFm8rSdqtmDNZiQ5TaFlZmfPLQCSdbN68NeoQ0kZunRqVeqLasnlb0r5EqtfMifQkG7aPyZckRn/dHCzXAJamJCIRERHZPRn0ey1sYrIO+NjMXiHRx+QUYKaZjQJw98tTFJ+IiIhUIWETk2eDabs3kh+KiIiI7IkMqjCpODExs2ygi7ufXwnxiIiIyO6yzElNwgxJXwzsZ2bVKyEeERERqcLCNuUsAt42s4kkRn4FwN3/kZKoREREpEoKm5h8GkxZwF6pC0dERER2Vwa15IQbx+TH0Dgm4Wgck/A0jolINDSOSXiVPY7Jtq3FSfsSyamWHf9xTMzsdRKXCe/E3U9KekQiIiJSZYVtyrmy1HxNoCewLfnhiIiIyO6ySmzLMbPTgNuBbOBed785qc+/p005ZjbT3Y+saD015YSjppzw1JQjEg015YRX2U05yfyuzc4p+yQbDCHyCYmBVpcAs4Bz3f3fydp/2KacBqUWs4AOQF6yghAREZG0cCSw0N0XAZjZ48BZQOUmJsD7JPqYGLAV+AzoH2bD8jKvqJjZQHcfE3UcpWVHHUAZ4nis4kjHKTwdq3DieJxy69SIOoQfiONxikIyv2vNbCAwsFTRmFLHeB/gi1KPLQF+lqx9Q4gB1gK/A37i7i2Bh0mMZbIpmYFUsoEVryIBHatwdJzC07EKR8cpHB2nJHP3Me7eodRUqYlf2MRkhLuvN7NjgZOAe4G7UxeWiIiIxNBSoKDU8r5BWdKETUyKg7/dgLHu/hKgIepFRESqlllAWzNrGdyq5hxgYjJ3EDYxWWpmo4G+wCQzq7Eb28ZRlW+P3A06VuHoOIWnYxWOjlM4Ok6VyN23AYOBKcB84El3/ziZ+wh1ubCZ1QZOA+a5+wIzawYc4u7/TGYwIiIiUrWlfEh6ERERkbDSuTlGREREMowSkyrMzC43s/lm9mjUsaQTM5sRdQxxZGYboo4hHZlZCzP7KOo4MpGZTTKzelHHIbtHTTkhWeJGBObuJVHHkixm9h/gZHdf8iOeIyfoDCVVnJltcPc6UceRbsysBfCiu7eLOJTYC3u+ycTzdVWS9jUmZvacmb1vZh8Ho9VhZhvM7CYz+9DM3jWz/KC8dbA8z8xuLP0Lz8yuMrNZZjbXzP4QlLUws/+a2UPAR+x87XZaM7N7gFbAy2Y23MzuN7OZZvYvMzsrWKeFmU03sznBdHRQfmJQPpEkDkOcLoL3l5nZX83so+D91Dd47CEz61Fq3Ue3H8+qopxj87iZdSu13oNm1svMsoP1t3/+fhFd9HvOzHLN7KXgvPORmfU1s+uC1/WRmY0JvjAxs8OD9T4EBpV6jovM7Bkzm2xmC8zsllKPdTGzd4LP4lNmVicov9nM/h0cu78FZb2DfX5oZtMq+VBUqIxj9ZmZNQoe72BmbwTzN5jZw2b2NvBwcIyeN7M3gmN0fbDeD87X259zV/sLtjnczN4MvkOmWOLCDomau6f1BDQI/tYi8WZsSGL4/DOC8ltIDBAH8CKJmw0B/BLYEMx3IXHJmZFI1l4EjgdaACXAUVG/zhQdu8+ARsCfgZ8HZfVI3KApF6gN1AzK2wKzg/kTSYz+2zLq1xDRcdtA4g7br5C4m0A+8DnQDDgBeC5YLw/4H5ATdcyVdVyCv2Udm0JgXLBOdRLDWtciMXLn9s9oDWB2Or63gtc9ttRy3vbzU7D8cKnz0lzg+GD+r8BHwfxFwKJg25rAYhI/iBoB04DcYL3fAdcF57v/8l3td73g7zxgn9JlcZrKOFafAY2C5Q7AG8H8DSRui1Kr1DFaFrz27ef9Drs6X5c6x+1qf9WAGUDjoKwvcH/Ux0aTp3+NCXB58KvjXRIf4LbAFhLJBSTe0C2C+Y7AU8H8Y6Weo0sw/QuYAxwYPA/AYnd/N1XBx0QX4Goz+wB4g8QJsTmJD+5YM5tH4rgdVGqbme7+v8oONEaOBca7e7G7FwFvAke4+5skBh9qDJwLPO1Vr6lrl8cGeBnoZIlxkE4Hprn7NyTef/2C9997JL5w2u76qWNtHnCKmf3FzI5z93UkXu97wWfoJOBgS/R5qOfu22syHv7e87zm7uvcfTOJGsn9gKNIfP7eDo7ThUH5OmAzcJ+Znc13twp5G3jQzAYQz1tx7epYlWdi8F7Z7hV3XxWUPUPiPQdln693tb8DgHbAK8ExHUFiFFOJWNib+MWSmZ0InAx0dPdNQdVfTWCrBykwiVFrK3qdBox099Hfe/4WJGoGMp0BPd39vzsVmt0AFAHtSdQkbS71cFU4LnvqIeDnJEZEvDjiWGLD3TcHn9FTSfw6fTx4yIAh7j4lqtiSwd0/MbPDgK7AjWb2Golmmg7u/kXweaoZ4qm+LTW//fxlJL6Mz/3+ymZ2JNAZ6EVi4KuT3P2XZvYzEqN1v29mh7v7qh/x8pKqjGO1je+6F3z/OH3/fPP9zpFexnrl7e9Z4GN377iHL0NSJN1rTPKANUFSciCJXxXleZdElR4kvjS2mwJcUqrNdh8za5L0aONrCjCkVPv3T4PyPGCZJzqQXUA8f3lFZTrQN+gf0ZhE09/M4LEHgaEA7l7l+uBQ/rF5gkSydhwwOSibAvzKzKoBmNn+ZpZbyTH/aGa2N7DJ3R8h0TxzWPDQV8G5pReAu68F1lri3mMA54d4+neBY8ysTbCv3OA41QHy3H0S8GsSPyIws9bu/p67XwesJGb948o4Vp8Bhwer9Cxj0+1OMbMGZlYL6EGihmh39/dfoLGZdQzWqWZmB+/hS5IkSusaExIntl+a2XwSb7KKmlyGAo+Y2fBg23UA7v5PM/t/wDvBd/MGEr94i8t6ogzzJ+A2YK6ZZZHoF9EduAt42sz6kTheqiVJcBK/tjoCHwbLw9x9OYC7FwXvyeeiCzFSZR4b4J8kmi6ed/ctQdm9JJpb/397d4wSMRAFYPh/ndpYeAArSxv1CB7BwlLtBCuvYLmCIrZaLVhZeADBWmQrDyFsI1gICj6LGTsJG1Q2cf+vTAYymRTz8l5mMqrB8Zgy2fTNKjCIiA/gHdin3Mcj8ET5x8iXXeAyIpIyJo0ycxwRO8BVLYVBKT28ADcRMUfJqhzWc4OIzim/ugAAALdJREFUWKnHbinPoku+G6t5SknqiFJSbnIPXFNKL8PMfKgZ7omvl5lvEbEFnEXEImU+PAV+dXt1tTdTy4WjbK3/mpkZEduUD2FnasWEfiYiloBRZi43tFmg1LTXJqidS2qhBmgbmXkw7b7ob/Q9Y9LWOnBe38qegb0p90c9UtPBd8BxQ5tN4AI4MSiRpPZmKmMiSZK6re8fv0qSpH/EwESSJHWGgYkkSeoMAxNJktQZBiaSJKkzPgFrstqlMbHCggAAAABJRU5ErkJggg==\\n\",\n            \"text/plain\": [\n              \"<Figure size 720x504 with 2 Axes>\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": [],\n            \"needs_background\": \"light\"\n          }\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"QKh_bJxtlhkW\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"From the above plot we can see that the most confused classes are 'joy' and 'love' which seems obivous as these two emotions are really close. We can say the same thing 'surprise' and 'anger' as well. So our model is doing pretty well.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"16TiclmeX1xE\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"vZ-YLmJyg64T\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"## SWAG\\n\",\n        \"\\n\",\n        \"Now lets try a more challenging task and see how it performs.\\n\",\n        \"\\n\",\n        \"SWAG is a natural language inference and commonsense reasoning task proposed in this [paper](https://arxiv.org/pdf/1808.05326.pdf).\\n\",\n        \"\\n\",\n        \"The basic task is that  a model is\\n\",\n        \"given a context **c = (s, n)**: a complete sentence\\n\",\n        \"**s** and a noun phrase **n** that begins a second sentence, as well as a list of possible verb phrase sentence endings **V**. The model must then\\n\",\n        \"select the most appropriate verb phrase **v** in **V**. For example\\n\",\n        \"\\n\",\n        \"On stage, a woman takes a seat at the piano. She\\n\",\n        \"\\n\",\n        \"a) sits on a bench as her sister plays with the doll.\\n\",\n        \"\\n\",\n        \"b) smiles with someone as the music plays.\\n\",\n        \"\\n\",\n        \"c) is in the crowd, watching the dancers.\\n\",\n        \"\\n\",\n        \"**d) nervously sets her fingers on the keys.**\\n\",\n        \"\\n\",\n        \"The correct answer is bolded. Given the above example the model should select **nervously sets her fingers on the keys** as the most appropriate verb phrase\\n\",\n        \"\\n\",\n        \"To frame this task in text-2-text setting the example is processed as below.\\n\",\n        \"\\n\",\n        \"context: context_text options: 1: option_1 2: option_2 3: option_3 4: option_4\\n\",\n        \"\\n\",\n        \"and if the actual label is 1 then the model is asked to predict the text '1'. Here's how the above example will be processed\\n\",\n        \"\\n\",\n        \"**Input**\\n\",\n        \"\\n\",\n        \"context: On stage, a woman takes a seat at the piano. She  options: 1: sits on a bench as her sister plays with the doll. 2: smiles with someone as the music plays. 3: is in the crowd, watching the dancers. 4: nervously sets her fingers on the keys.\\n\",\n        \"\\n\",\n        \"**Target**\\n\",\n        \"\\n\",\n        \"4\\n\",\n        \"\\n\",\n        \"This is just one possible way to process these examples, there are various other ways we can formulate this problem in text-2-text setting but that's for later.\"\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"hOxk-ZoJmamm\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"### Dataset\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"yeHfgOhThLPj\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"import csv\\n\",\n        \"from dataclasses import dataclass\\n\",\n        \"\\n\",\n        \"from enum import Enum\\n\",\n        \"from typing import List, Optional\\n\",\n        \"from transformers import PreTrainedTokenizer\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"3DulV7U5hik7\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"880c611b-d11c-4620-9d75-0bcfa423c1ff\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 386\n        }\n      },\n      \"source\": [\n        \"!wget https://raw.githubusercontent.com/rowanz/swagaf/master/data/train.csv\\n\",\n        \"!wget https://raw.githubusercontent.com/rowanz/swagaf/master/data/val.csv\\n\",\n        \"\\n\",\n        \"!mkdir swag_data\\n\",\n        \"!mv *.csv swag_data\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"--2020-05-09 15:06:34--  https://raw.githubusercontent.com/rowanz/swagaf/master/data/train.csv\\n\",\n            \"Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\\n\",\n            \"Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\\n\",\n            \"HTTP request sent, awaiting response... 200 OK\\n\",\n            \"Length: 28243333 (27M) [text/plain]\\n\",\n            \"Saving to: ‘train.csv’\\n\",\n            \"\\n\",\n            \"train.csv           100%[===================>]  26.93M  35.9MB/s    in 0.8s    \\n\",\n            \"\\n\",\n            \"2020-05-09 15:06:35 (35.9 MB/s) - ‘train.csv’ saved [28243333/28243333]\\n\",\n            \"\\n\",\n            \"--2020-05-09 15:06:38--  https://raw.githubusercontent.com/rowanz/swagaf/master/data/val.csv\\n\",\n            \"Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\\n\",\n            \"Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\\n\",\n            \"HTTP request sent, awaiting response... 200 OK\\n\",\n            \"Length: 7893588 (7.5M) [text/plain]\\n\",\n            \"Saving to: ‘val.csv’\\n\",\n            \"\\n\",\n            \"val.csv             100%[===================>]   7.53M  17.5MB/s    in 0.4s    \\n\",\n            \"\\n\",\n            \"2020-05-09 15:06:39 (17.5 MB/s) - ‘val.csv’ saved [7893588/7893588]\\n\",\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"Tllm6irZg8IO\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"# below code is adapted from https://github.com/huggingface/transformers/blob/master/examples/multiple-choice/utils_multiple_choice.py\\n\",\n        \"\\n\",\n        \"@dataclass(frozen=True)\\n\",\n        \"class InputExample:\\n\",\n        \"    \\\"\\\"\\\"\\n\",\n        \"    A single training/test example for multiple choice\\n\",\n        \"    Args:\\n\",\n        \"        example_id: Unique id for the example.\\n\",\n        \"        question: string. The untokenized text of the second sequence (question).\\n\",\n        \"        contexts: list of str. The untokenized text of the first sequence (context of corresponding question).\\n\",\n        \"        endings: list of str. multiple choice's options. Its length must be equal to contexts' length.\\n\",\n        \"        label: (Optional) string. The label of the example. This should be\\n\",\n        \"        specified for train and dev examples, but not for test examples.\\n\",\n        \"    \\\"\\\"\\\"\\n\",\n        \"\\n\",\n        \"    example_id: str\\n\",\n        \"    context: str\\n\",\n        \"    endings: List[str]\\n\",\n        \"    label: Optional[str]\\n\",\n        \"\\n\",\n        \"class Split(Enum):\\n\",\n        \"    train = \\\"train\\\"\\n\",\n        \"    dev = \\\"dev\\\"\\n\",\n        \"    test = \\\"test\\\"\\n\",\n        \"\\n\",\n        \"class DataProcessor:\\n\",\n        \"    \\\"\\\"\\\"Base class for data converters for multiple choice data sets.\\\"\\\"\\\"\\n\",\n        \"\\n\",\n        \"    def get_train_examples(self, data_dir):\\n\",\n        \"        \\\"\\\"\\\"Gets a collection of `InputExample`s for the train set.\\\"\\\"\\\"\\n\",\n        \"        raise NotImplementedError()\\n\",\n        \"\\n\",\n        \"    def get_dev_examples(self, data_dir):\\n\",\n        \"        \\\"\\\"\\\"Gets a collection of `InputExample`s for the dev set.\\\"\\\"\\\"\\n\",\n        \"        raise NotImplementedError()\\n\",\n        \"\\n\",\n        \"    def get_test_examples(self, data_dir):\\n\",\n        \"        \\\"\\\"\\\"Gets a collection of `InputExample`s for the test set.\\\"\\\"\\\"\\n\",\n        \"        raise NotImplementedError()\\n\",\n        \"\\n\",\n        \"    def get_labels(self):\\n\",\n        \"        \\\"\\\"\\\"Gets the list of labels for this data set.\\\"\\\"\\\"\\n\",\n        \"        raise NotImplementedError()\\n\",\n        \"\\n\",\n        \"class SwagProcessor(DataProcessor):\\n\",\n        \"    \\\"\\\"\\\"Processor for the SWAG data set.\\\"\\\"\\\"\\n\",\n        \"\\n\",\n        \"    def get_train_examples(self, data_dir):\\n\",\n        \"        \\\"\\\"\\\"See base class.\\\"\\\"\\\"\\n\",\n        \"        logger.info(\\\"LOOKING AT {} train\\\".format(data_dir))\\n\",\n        \"        return self._create_examples(self._read_csv(os.path.join(data_dir, \\\"train.csv\\\")), \\\"train\\\")\\n\",\n        \"\\n\",\n        \"    def get_dev_examples(self, data_dir):\\n\",\n        \"        \\\"\\\"\\\"See base class.\\\"\\\"\\\"\\n\",\n        \"        logger.info(\\\"LOOKING AT {} dev\\\".format(data_dir))\\n\",\n        \"        return self._create_examples(self._read_csv(os.path.join(data_dir, \\\"val.csv\\\")), \\\"dev\\\")\\n\",\n        \"\\n\",\n        \"    def get_test_examples(self, data_dir):\\n\",\n        \"        \\\"\\\"\\\"See base class.\\\"\\\"\\\"\\n\",\n        \"        logger.info(\\\"LOOKING AT {} dev\\\".format(data_dir))\\n\",\n        \"        raise ValueError(\\n\",\n        \"            \\\"For swag testing, the input file does not contain a label column. It can not be tested in current code\\\"\\n\",\n        \"            \\\"setting!\\\"\\n\",\n        \"        )\\n\",\n        \"        return self._create_examples(self._read_csv(os.path.join(data_dir, \\\"test.csv\\\")), \\\"test\\\")\\n\",\n        \"\\n\",\n        \"    def get_labels(self):\\n\",\n        \"        \\\"\\\"\\\"See base class.\\\"\\\"\\\"\\n\",\n        \"        return [\\\"0\\\", \\\"1\\\", \\\"2\\\", \\\"3\\\"]\\n\",\n        \"\\n\",\n        \"    def _read_csv(self, input_file):\\n\",\n        \"        with open(input_file, \\\"r\\\", encoding=\\\"utf-8\\\") as f:\\n\",\n        \"            return list(csv.reader(f))\\n\",\n        \"\\n\",\n        \"    def _create_examples(self, lines: List[List[str]], type: str):\\n\",\n        \"        \\\"\\\"\\\"Creates examples for the training and dev sets.\\\"\\\"\\\"\\n\",\n        \"        if type == \\\"train\\\" and lines[0][-1] != \\\"label\\\":\\n\",\n        \"            raise ValueError(\\\"For training, the input file must contain a label column.\\\")\\n\",\n        \"\\n\",\n        \"        examples = [\\n\",\n        \"            InputExample(\\n\",\n        \"                example_id=line[2],\\n\",\n        \"                # common beginning of each\\n\",\n        \"                # choice is stored in \\\"sent2\\\".\\n\",\n        \"                context=line[3],\\n\",\n        \"                endings=[line[7], line[8], line[9], line[10]],\\n\",\n        \"                label=line[11],\\n\",\n        \"            )\\n\",\n        \"            for line in lines[1:]  # we skip the line with the column names\\n\",\n        \"        ]\\n\",\n        \"\\n\",\n        \"        return examples\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"-OXxGvqZjC9L\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"class SwagDataset(Dataset):\\n\",\n        \"  def __init__(self, tokenizer, data_dir, type_path,  max_len=512):\\n\",\n        \"    self.data_dir = data_dir\\n\",\n        \"    self.type_path = type_path\\n\",\n        \"    self.max_len = max_len\\n\",\n        \"    self.tokenizer = tokenizer\\n\",\n        \"    self.inputs = []\\n\",\n        \"    self.targets = []\\n\",\n        \"\\n\",\n        \"    self.proc = SwagProcessor()\\n\",\n        \"\\n\",\n        \"    self._build()\\n\",\n        \"  \\n\",\n        \"  def __getitem__(self, index):\\n\",\n        \"    source_ids = self.inputs[index][\\\"input_ids\\\"].squeeze()\\n\",\n        \"    target_ids = self.targets[index][\\\"input_ids\\\"].squeeze()\\n\",\n        \"\\n\",\n        \"    src_mask    = self.inputs[index][\\\"attention_mask\\\"].squeeze()  # might need to squeeze\\n\",\n        \"    target_mask = self.targets[index][\\\"attention_mask\\\"].squeeze()  # might need to squeeze\\n\",\n        \"\\n\",\n        \"    return {\\\"source_ids\\\": source_ids, \\\"source_mask\\\": src_mask, \\\"target_ids\\\": target_ids, \\\"target_mask\\\": target_mask}\\n\",\n        \"  \\n\",\n        \"  def __len__(self):\\n\",\n        \"    return len(self.inputs)\\n\",\n        \"  \\n\",\n        \"  def _build(self):\\n\",\n        \"    if self.type_path == 'train':\\n\",\n        \"      examples = self.proc.get_train_examples(self.data_dir)\\n\",\n        \"    else:\\n\",\n        \"      examples = self.proc.get_dev_examples(self.data_dir)\\n\",\n        \"    \\n\",\n        \"    for example in examples:\\n\",\n        \"      self._create_features(example)\\n\",\n        \"  \\n\",\n        \"  def _create_features(self, example):\\n\",\n        \"    input_ = example.context\\n\",\n        \"    options = ['%s: %s' % (i, option) for i, option in zip('1234', example.endings)]\\n\",\n        \"    options = \\\" \\\".join(options)\\n\",\n        \"    input_ = \\\"context: %s  options: %s </s>\\\" % (input_, options)\\n\",\n        \"    target = \\\"%s </s>\\\" % str(int(example.label) + 1)\\n\",\n        \"\\n\",\n        \"    # tokenize inputs\\n\",\n        \"    tokenized_inputs = self.tokenizer.batch_encode_plus(\\n\",\n        \"        [input_], max_length=self.max_len, pad_to_max_length=True, return_tensors=\\\"pt\\\"\\n\",\n        \"    )\\n\",\n        \"    # tokenize targets\\n\",\n        \"    tokenized_targets = self.tokenizer.batch_encode_plus(\\n\",\n        \"        [target], max_length=2, pad_to_max_length=True, return_tensors=\\\"pt\\\"\\n\",\n        \"    )\\n\",\n        \"\\n\",\n        \"    self.inputs.append(tokenized_inputs)\\n\",\n        \"    self.targets.append(tokenized_targets)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"oKqFMTku3sDC\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"97ce9f8a-4b75-4d95-ba04-fae101f8db82\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 186,\n          \"referenced_widgets\": [\n            \"78b1b91a08214461b74fb1e143247d1e\",\n            \"902a509471004d2691d807c4990fccd2\",\n            \"74ec15497e1743a4af6be12e3bc1487d\",\n            \"a70b457d9379403f9fac247de68bb8e3\",\n            \"28f9d9aa0ece4831b0f9e412d8a88f8d\",\n            \"7640680e1006492da75d873726567fed\",\n            \"1090e3e017564a2281c60fb53a901c75\",\n            \"9df2679ba627444e9b76bd2ff0ddc657\"\n          ]\n        }\n      },\n      \"source\": [\n        \"tokenizer = T5Tokenizer.from_pretrained('t5-base')\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:filelock:Lock 140245777042344 acquired on /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f.lock\\n\",\n            \"INFO:transformers.file_utils:https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmpv2ybakmg\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"78b1b91a08214461b74fb1e143247d1e\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=791656.0, style=ProgressStyle(descripti…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:transformers.file_utils:storing https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model in cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\\n\",\n            \"INFO:transformers.file_utils:creating metadata file for /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\\n\",\n            \"INFO:filelock:Lock 140245777042344 released on /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f.lock\\n\",\n            \"INFO:transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model from cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"PIUiU7zSpbb3\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"328b5f15-fe96-43ce-99e9-5d4233a7e97a\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 50\n        }\n      },\n      \"source\": [\n        \"dataset = SwagDataset(tokenizer, data_dir='swag_data', type_path='val')\\n\",\n        \"len(dataset)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:__main__:LOOKING AT swag_data dev\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"execute_result\",\n          \"data\": {\n            \"text/plain\": [\n              \"20006\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          },\n          \"execution_count\": 14\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"zxXGbCzB37HG\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"8fbda79c-7be7-4d5f-8d5f-7b986a1c374b\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 70\n        }\n      },\n      \"source\": [\n        \"data = dataset[69]\\n\",\n        \"print(tokenizer.decode(data['source_ids']))\\n\",\n        \"print(tokenizer.decode(data['target_ids']))\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"context: A little girl plays softly the drums holding two sticks while she is singing on a microphone. The, the girl options: 1: take in the greeting and an asian girl followed by two people standing on stage. 2: holds the microphone up and begins to girl dance an entire time. 3: claps the girls hands anxiously. 4: plays more fast the drums.\\n\",\n            \"4\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"aVfmE4O3Ku7H\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"### Train\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"colab_type\": \"code\",\n        \"id\": \"DDPxWUY86llx\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"!mkdir -p t5_swag\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"colab_type\": \"code\",\n        \"outputId\": \"fe4e58ab-6916-45f9-f742-797d87ad1ef4\",\n        \"id\": \"PrWtMjcj6lmA\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 54\n        }\n      },\n      \"source\": [\n        \"args_dict.update({'data_dir': 'swag_data', 'output_dir': 't5_swag', 'num_train_epochs': 3})\\n\",\n        \"args = argparse.Namespace(**args_dict)\\n\",\n        \"print(args_dict)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"{'data_dir': 'swag_data', 'output_dir': 't5_swag', 'model_name_or_path': 't5-base', 'tokenizer_name_or_path': 't5-base', 'max_seq_length': 512, 'learning_rate': 0.0003, 'weight_decay': 0.0, 'adam_epsilon': 1e-08, 'warmup_steps': 0, 'train_batch_size': 8, 'eval_batch_size': 8, 'num_train_epochs': 3, 'gradient_accumulation_steps': 16, 'n_gpu': 1, 'early_stop_callback': False, 'fp_16': False, 'opt_level': 'O1', 'max_grad_norm': 1.0, 'seed': 42}\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"colab_type\": \"code\",\n        \"id\": \"2Ojz3THj6lmK\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"checkpoint_callback = pl.callbacks.ModelCheckpoint(\\n\",\n        \"    filepath=args.output_dir, prefix=\\\"checkpoint\\\", monitor=\\\"val_loss\\\", mode=\\\"min\\\", save_top_k=5\\n\",\n        \")\\n\",\n        \"\\n\",\n        \"train_params = dict(\\n\",\n        \"    accumulate_grad_batches=args.gradient_accumulation_steps,\\n\",\n        \"    gpus=args.n_gpu,\\n\",\n        \"    max_epochs=args.num_train_epochs,\\n\",\n        \"    early_stop_callback=False,\\n\",\n        \"    precision= 16 if args.fp_16 else 32,\\n\",\n        \"    amp_level=args.opt_level,\\n\",\n        \"    gradient_clip_val=args.max_grad_norm,\\n\",\n        \"    checkpoint_callback=checkpoint_callback,\\n\",\n        \"    callbacks=[LoggingCallback()],\\n\",\n        \")\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"colab_type\": \"code\",\n        \"id\": \"Kk0x0Nql6lmQ\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"def get_dataset(tokenizer, type_path, args):\\n\",\n        \"  return SwagDataset(tokenizer=tokenizer, data_dir=args.data_dir, type_path=type_path,  max_len=args.max_seq_length)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"colab_type\": \"code\",\n        \"outputId\": \"94aa8d13-9d11-4fa9-979f-e3bbf15bb639\",\n        \"id\": \"XDFGzzpQ6lmU\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 1000,\n          \"referenced_widgets\": [\n            \"5c7427d7db844b9691d30cf2de1efc17\",\n            \"bb0df1833ee3489da5c2a9c7b1306cc6\",\n            \"3d2817812b6f475a8c838fd14646469a\",\n            \"9d0f0c946790477fb8bc8bac64dfd7de\",\n            \"8254b8062d5e4280bea46f8bc444c5db\",\n            \"ab5f07ab5c574148a0062eb7f1ce5bcd\",\n            \"47fdc2009efc443392ecd182996fcca9\",\n            \"9b705e83fea84cbf912e33d6342be721\",\n            \"e8e8ea6199df43019930ac7b557c46a5\",\n            \"0566f29b017f47f399d7579d7929e046\",\n            \"932309f0a40b46659c0cac7cc37fdc05\",\n            \"da3665141bd44a24a5b5c9f36d4a9c52\",\n            \"5c98e3a5b6a6403a936a725f4c30cdd3\",\n            \"8da2b560fa9348098a2a7f09967d5f5f\",\n            \"7e37cac227014717987922341f8099fe\",\n            \"b95f98f98a76434591f90d41b43e39ba\"\n          ]\n        }\n      },\n      \"source\": [\n        \"model = T5FineTuner(args)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:filelock:Lock 140242832534944 acquired on /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b.lock\\n\",\n            \"INFO:transformers.file_utils:https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmpwv74k3ig\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"5c7427d7db844b9691d30cf2de1efc17\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1199.0, style=ProgressStyle(description…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:transformers.file_utils:storing https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json in cache at /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b\\n\",\n            \"INFO:transformers.file_utils:creating metadata file for /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b\\n\",\n            \"INFO:filelock:Lock 140242832534944 released on /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b.lock\\n\",\n            \"INFO:transformers.configuration_utils:loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json from cache at /root/.cache/torch/transformers/40578967d1f029acb6162b36db9d8b4307063e885990ccd297c2c5be1cf1b3d7.2995d650f5eba18c8baa4146e210d32d56165e90d374281741fc78b872cd6c9b\\n\",\n            \"INFO:transformers.configuration_utils:Model config T5Config {\\n\",\n            \"  \\\"architectures\\\": [\\n\",\n            \"    \\\"T5WithLMHeadModel\\\"\\n\",\n            \"  ],\\n\",\n            \"  \\\"d_ff\\\": 3072,\\n\",\n            \"  \\\"d_kv\\\": 64,\\n\",\n            \"  \\\"d_model\\\": 768,\\n\",\n            \"  \\\"decoder_start_token_id\\\": 0,\\n\",\n            \"  \\\"dropout_rate\\\": 0.1,\\n\",\n            \"  \\\"eos_token_id\\\": 1,\\n\",\n            \"  \\\"initializer_factor\\\": 1.0,\\n\",\n            \"  \\\"is_encoder_decoder\\\": true,\\n\",\n            \"  \\\"layer_norm_epsilon\\\": 1e-06,\\n\",\n            \"  \\\"model_type\\\": \\\"t5\\\",\\n\",\n            \"  \\\"n_positions\\\": 512,\\n\",\n            \"  \\\"num_heads\\\": 12,\\n\",\n            \"  \\\"num_layers\\\": 12,\\n\",\n            \"  \\\"output_past\\\": true,\\n\",\n            \"  \\\"pad_token_id\\\": 0,\\n\",\n            \"  \\\"relative_attention_num_buckets\\\": 32,\\n\",\n            \"  \\\"task_specific_params\\\": {\\n\",\n            \"    \\\"summarization\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"length_penalty\\\": 2.0,\\n\",\n            \"      \\\"max_length\\\": 200,\\n\",\n            \"      \\\"min_length\\\": 30,\\n\",\n            \"      \\\"no_repeat_ngram_size\\\": 3,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"summarize: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_de\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to German: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_fr\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to French: \\\"\\n\",\n            \"    },\\n\",\n            \"    \\\"translation_en_to_ro\\\": {\\n\",\n            \"      \\\"early_stopping\\\": true,\\n\",\n            \"      \\\"max_length\\\": 300,\\n\",\n            \"      \\\"num_beams\\\": 4,\\n\",\n            \"      \\\"prefix\\\": \\\"translate English to Romanian: \\\"\\n\",\n            \"    }\\n\",\n            \"  },\\n\",\n            \"  \\\"vocab_size\\\": 32128\\n\",\n            \"}\\n\",\n            \"\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:filelock:Lock 140242971659568 acquired on /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa.lock\\n\",\n            \"INFO:transformers.file_utils:https://cdn.huggingface.co/t5-base-pytorch_model.bin not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmp5pcfx_u3\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"e8e8ea6199df43019930ac7b557c46a5\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=891691430.0, style=ProgressStyle(descri…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:transformers.file_utils:storing https://cdn.huggingface.co/t5-base-pytorch_model.bin in cache at /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa\\n\",\n            \"INFO:transformers.file_utils:creating metadata file for /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa\\n\",\n            \"INFO:filelock:Lock 140242971659568 released on /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa.lock\\n\",\n            \"INFO:transformers.modeling_utils:loading weights file https://cdn.huggingface.co/t5-base-pytorch_model.bin from cache at /root/.cache/torch/transformers/f6f2fde9fa7611f4eff74620de9cbe734e7a717b5b143bd283cae4c2d6022990.54f906ff53bd09195cfc183a29cadc81b7705f07fcdb796d24163cb632b6bdfa\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:transformers.modeling_utils:Weights of T5ForConditionalGeneration not initialized from pretrained model: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight']\\n\",\n            \"INFO:transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model from cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f\\n\"\n          ],\n          \"name\": \"stderr\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"colab_type\": \"code\",\n        \"outputId\": \"57300f1a-14a8-4e26-8dac-9238e34741c0\",\n        \"id\": \"1sQVILFo63Eb\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 50\n        }\n      },\n      \"source\": [\n        \"trainer = pl.Trainer(**train_params)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:lightning:GPU available: True, used: True\\n\",\n            \"INFO:lightning:CUDA_VISIBLE_DEVICES: [0]\\n\"\n          ],\n          \"name\": \"stderr\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"STkqK5nC64YP\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"cb613d72-009f-44eb-acd8-b9c3dd44b0cb\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 1000,\n          \"referenced_widgets\": [\n            \"8e79d03deee94b299431330441bd64c8\",\n            \"510043ffee634f86b89ec3fc060a74ea\",\n            \"e86c5fbd48ce4215a0df353122183982\",\n            \"bfc3a5a3cf2e49868053db6f1ef7785d\",\n            \"361a2f79ed89495894d0b09a709f8f32\",\n            \"f7e53d55f0234627a3b9f2c90eb8682f\",\n            \"3584c01b0c5e47dfa373bae29461e94a\",\n            \"cfd9db6f31474a8189e741bf8fdad6a9\",\n            \"68705cee3df5458fb5145046337d925c\",\n            \"4cf1613d58bd450780ac95c994686985\",\n            \"3ee5f7cf56394175900ebb14ae0b5f9e\",\n            \"9f054dcf926c45459b7aa728493571a0\",\n            \"b52599dda9d94c83891d1c42c5f557e0\",\n            \"a1cf907a3bcc4177b1d5dd9edbf30c20\",\n            \"82b29ceeb21c417782e9e29a81eb47ea\",\n            \"886260804ffd4e11bc93fb6e098111ab\",\n            \"69f6eb1cb0434128961b5d83529813c5\",\n            \"6723d50588a248d0ad7bb118de8c3fd5\",\n            \"86d71b8233c14252a897ffa29ea6d9df\",\n            \"d01c708e22ab423896271fa79860e7c3\",\n            \"0e8da5995754472fac5fba1f8b30d107\",\n            \"3dbee77f299f4e14a1698b60d609b8a1\",\n            \"8c4c9025aaae44148591ae6f8bb37347\",\n            \"29e2f2f0914e4dea8117844675b42be5\",\n            \"0cfc8fa73f164b4fa5ddcbc3f115ef9b\",\n            \"4559bd35b33f4804b968debaaf316463\",\n            \"e403cc7718bf48f1b95150482e083f02\",\n            \"f6248a9db7f2466a9ab3a4fbd214f265\",\n            \"475e5353d31147d3ab156c0e7835684c\",\n            \"c3f65d683c6e4fe18e31ecc305f8d455\",\n            \"9b50abad66b44022aa389bc3f312db6b\",\n            \"762b2941ff3e47d89b6e6ce4350bc058\"\n          ]\n        }\n      },\n      \"source\": [\n        \"trainer.fit(model)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:lightning:\\n\",\n            \"    | Name                                                                  | Type                       | Params\\n\",\n            \"-----------------------------------------------------------------------------------------------------------------\\n\",\n            \"0   | model                                                                 | T5ForConditionalGeneration | 222 M \\n\",\n            \"1   | model.shared                                                          | Embedding                  | 24 M  \\n\",\n            \"2   | model.encoder                                                         | T5Stack                    | 109 M \\n\",\n            \"3   | model.encoder.block                                                   | ModuleList                 | 84 M  \\n\",\n            \"4   | model.encoder.block.0                                                 | T5Block                    | 7 M   \\n\",\n            \"5   | model.encoder.block.0.layer                                           | ModuleList                 | 7 M   \\n\",\n            \"6   | model.encoder.block.0.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"7   | model.encoder.block.0.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"8   | model.encoder.block.0.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"9   | model.encoder.block.0.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"10  | model.encoder.block.0.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"11  | model.encoder.block.0.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"12  | model.encoder.block.0.layer.0.SelfAttention.relative_attention_bias   | Embedding                  | 384   \\n\",\n            \"13  | model.encoder.block.0.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"14  | model.encoder.block.0.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"15  | model.encoder.block.0.layer.1                                         | T5LayerFF                  | 4 M   \\n\",\n            \"16  | model.encoder.block.0.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"17  | model.encoder.block.0.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"18  | model.encoder.block.0.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"19  | model.encoder.block.0.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"20  | model.encoder.block.0.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"21  | model.encoder.block.0.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"22  | model.encoder.block.1                                                 | T5Block                    | 7 M   \\n\",\n            \"23  | model.encoder.block.1.layer                                           | ModuleList                 | 7 M   \\n\",\n            \"24  | model.encoder.block.1.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"25  | model.encoder.block.1.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"26  | model.encoder.block.1.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"27  | model.encoder.block.1.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"28  | model.encoder.block.1.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"29  | model.encoder.block.1.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"30  | model.encoder.block.1.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"31  | model.encoder.block.1.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"32  | model.encoder.block.1.layer.1                                         | T5LayerFF                  | 4 M   \\n\",\n            \"33  | model.encoder.block.1.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"34  | model.encoder.block.1.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"35  | model.encoder.block.1.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"36  | model.encoder.block.1.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"37  | model.encoder.block.1.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"38  | model.encoder.block.1.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"39  | model.encoder.block.2                                                 | T5Block                    | 7 M   \\n\",\n            \"40  | model.encoder.block.2.layer                                           | ModuleList                 | 7 M   \\n\",\n            \"41  | model.encoder.block.2.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"42  | model.encoder.block.2.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"43  | model.encoder.block.2.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"44  | model.encoder.block.2.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"45  | model.encoder.block.2.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"46  | model.encoder.block.2.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"47  | model.encoder.block.2.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"48  | model.encoder.block.2.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"49  | model.encoder.block.2.layer.1                                         | T5LayerFF                  | 4 M   \\n\",\n            \"50  | model.encoder.block.2.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"51  | model.encoder.block.2.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"52  | model.encoder.block.2.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"53  | model.encoder.block.2.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"54  | model.encoder.block.2.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"55  | model.encoder.block.2.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"56  | model.encoder.block.3                                                 | T5Block                    | 7 M   \\n\",\n            \"57  | model.encoder.block.3.layer                                           | ModuleList                 | 7 M   \\n\",\n            \"58  | model.encoder.block.3.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"59  | model.encoder.block.3.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"60  | model.encoder.block.3.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"61  | model.encoder.block.3.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"62  | model.encoder.block.3.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"63  | model.encoder.block.3.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"64  | model.encoder.block.3.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"65  | model.encoder.block.3.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"66  | model.encoder.block.3.layer.1                                         | T5LayerFF                  | 4 M   \\n\",\n            \"67  | model.encoder.block.3.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"68  | model.encoder.block.3.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"69  | model.encoder.block.3.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"70  | model.encoder.block.3.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"71  | model.encoder.block.3.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"72  | model.encoder.block.3.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"73  | model.encoder.block.4                                                 | T5Block                    | 7 M   \\n\",\n            \"74  | model.encoder.block.4.layer                                           | ModuleList                 | 7 M   \\n\",\n            \"75  | model.encoder.block.4.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"76  | model.encoder.block.4.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"77  | model.encoder.block.4.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"78  | model.encoder.block.4.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"79  | model.encoder.block.4.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"80  | model.encoder.block.4.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"81  | model.encoder.block.4.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"82  | model.encoder.block.4.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"83  | model.encoder.block.4.layer.1                                         | T5LayerFF                  | 4 M   \\n\",\n            \"84  | model.encoder.block.4.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"85  | model.encoder.block.4.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"86  | model.encoder.block.4.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"87  | model.encoder.block.4.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"88  | model.encoder.block.4.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"89  | model.encoder.block.4.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"90  | model.encoder.block.5                                                 | T5Block                    | 7 M   \\n\",\n            \"91  | model.encoder.block.5.layer                                           | ModuleList                 | 7 M   \\n\",\n            \"92  | model.encoder.block.5.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"93  | model.encoder.block.5.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"94  | model.encoder.block.5.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"95  | model.encoder.block.5.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"96  | model.encoder.block.5.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"97  | model.encoder.block.5.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"98  | model.encoder.block.5.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"99  | model.encoder.block.5.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"100 | model.encoder.block.5.layer.1                                         | T5LayerFF                  | 4 M   \\n\",\n            \"101 | model.encoder.block.5.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"102 | model.encoder.block.5.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"103 | model.encoder.block.5.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"104 | model.encoder.block.5.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"105 | model.encoder.block.5.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"106 | model.encoder.block.5.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"107 | model.encoder.block.6                                                 | T5Block                    | 7 M   \\n\",\n            \"108 | model.encoder.block.6.layer                                           | ModuleList                 | 7 M   \\n\",\n            \"109 | model.encoder.block.6.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"110 | model.encoder.block.6.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"111 | model.encoder.block.6.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"112 | model.encoder.block.6.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"113 | model.encoder.block.6.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"114 | model.encoder.block.6.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"115 | model.encoder.block.6.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"116 | model.encoder.block.6.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"117 | model.encoder.block.6.layer.1                                         | T5LayerFF                  | 4 M   \\n\",\n            \"118 | model.encoder.block.6.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"119 | model.encoder.block.6.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"120 | model.encoder.block.6.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"121 | model.encoder.block.6.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"122 | model.encoder.block.6.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"123 | model.encoder.block.6.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"124 | model.encoder.block.7                                                 | T5Block                    | 7 M   \\n\",\n            \"125 | model.encoder.block.7.layer                                           | ModuleList                 | 7 M   \\n\",\n            \"126 | model.encoder.block.7.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"127 | model.encoder.block.7.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"128 | model.encoder.block.7.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"129 | model.encoder.block.7.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"130 | model.encoder.block.7.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"131 | model.encoder.block.7.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"132 | model.encoder.block.7.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"133 | model.encoder.block.7.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"134 | model.encoder.block.7.layer.1                                         | T5LayerFF                  | 4 M   \\n\",\n            \"135 | model.encoder.block.7.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"136 | model.encoder.block.7.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"137 | model.encoder.block.7.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"138 | model.encoder.block.7.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"139 | model.encoder.block.7.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"140 | model.encoder.block.7.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"141 | model.encoder.block.8                                                 | T5Block                    | 7 M   \\n\",\n            \"142 | model.encoder.block.8.layer                                           | ModuleList                 | 7 M   \\n\",\n            \"143 | model.encoder.block.8.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"144 | model.encoder.block.8.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"145 | model.encoder.block.8.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"146 | model.encoder.block.8.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"147 | model.encoder.block.8.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"148 | model.encoder.block.8.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"149 | model.encoder.block.8.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"150 | model.encoder.block.8.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"151 | model.encoder.block.8.layer.1                                         | T5LayerFF                  | 4 M   \\n\",\n            \"152 | model.encoder.block.8.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"153 | model.encoder.block.8.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"154 | model.encoder.block.8.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"155 | model.encoder.block.8.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"156 | model.encoder.block.8.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"157 | model.encoder.block.8.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"158 | model.encoder.block.9                                                 | T5Block                    | 7 M   \\n\",\n            \"159 | model.encoder.block.9.layer                                           | ModuleList                 | 7 M   \\n\",\n            \"160 | model.encoder.block.9.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"161 | model.encoder.block.9.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"162 | model.encoder.block.9.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"163 | model.encoder.block.9.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"164 | model.encoder.block.9.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"165 | model.encoder.block.9.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"166 | model.encoder.block.9.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"167 | model.encoder.block.9.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"168 | model.encoder.block.9.layer.1                                         | T5LayerFF                  | 4 M   \\n\",\n            \"169 | model.encoder.block.9.layer.1.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"170 | model.encoder.block.9.layer.1.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"171 | model.encoder.block.9.layer.1.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"172 | model.encoder.block.9.layer.1.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"173 | model.encoder.block.9.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"174 | model.encoder.block.9.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"175 | model.encoder.block.10                                                | T5Block                    | 7 M   \\n\",\n            \"176 | model.encoder.block.10.layer                                          | ModuleList                 | 7 M   \\n\",\n            \"177 | model.encoder.block.10.layer.0                                        | T5LayerSelfAttention       | 2 M   \\n\",\n            \"178 | model.encoder.block.10.layer.0.SelfAttention                          | T5Attention                | 2 M   \\n\",\n            \"179 | model.encoder.block.10.layer.0.SelfAttention.q                        | Linear                     | 589 K \\n\",\n            \"180 | model.encoder.block.10.layer.0.SelfAttention.k                        | Linear                     | 589 K \\n\",\n            \"181 | model.encoder.block.10.layer.0.SelfAttention.v                        | Linear                     | 589 K \\n\",\n            \"182 | model.encoder.block.10.layer.0.SelfAttention.o                        | Linear                     | 589 K \\n\",\n            \"183 | model.encoder.block.10.layer.0.layer_norm                             | T5LayerNorm                | 768   \\n\",\n            \"184 | model.encoder.block.10.layer.0.dropout                                | Dropout                    | 0     \\n\",\n            \"185 | model.encoder.block.10.layer.1                                        | T5LayerFF                  | 4 M   \\n\",\n            \"186 | model.encoder.block.10.layer.1.DenseReluDense                         | T5DenseReluDense           | 4 M   \\n\",\n            \"187 | model.encoder.block.10.layer.1.DenseReluDense.wi                      | Linear                     | 2 M   \\n\",\n            \"188 | model.encoder.block.10.layer.1.DenseReluDense.wo                      | Linear                     | 2 M   \\n\",\n            \"189 | model.encoder.block.10.layer.1.DenseReluDense.dropout                 | Dropout                    | 0     \\n\",\n            \"190 | model.encoder.block.10.layer.1.layer_norm                             | T5LayerNorm                | 768   \\n\",\n            \"191 | model.encoder.block.10.layer.1.dropout                                | Dropout                    | 0     \\n\",\n            \"192 | model.encoder.block.11                                                | T5Block                    | 7 M   \\n\",\n            \"193 | model.encoder.block.11.layer                                          | ModuleList                 | 7 M   \\n\",\n            \"194 | model.encoder.block.11.layer.0                                        | T5LayerSelfAttention       | 2 M   \\n\",\n            \"195 | model.encoder.block.11.layer.0.SelfAttention                          | T5Attention                | 2 M   \\n\",\n            \"196 | model.encoder.block.11.layer.0.SelfAttention.q                        | Linear                     | 589 K \\n\",\n            \"197 | model.encoder.block.11.layer.0.SelfAttention.k                        | Linear                     | 589 K \\n\",\n            \"198 | model.encoder.block.11.layer.0.SelfAttention.v                        | Linear                     | 589 K \\n\",\n            \"199 | model.encoder.block.11.layer.0.SelfAttention.o                        | Linear                     | 589 K \\n\",\n            \"200 | model.encoder.block.11.layer.0.layer_norm                             | T5LayerNorm                | 768   \\n\",\n            \"201 | model.encoder.block.11.layer.0.dropout                                | Dropout                    | 0     \\n\",\n            \"202 | model.encoder.block.11.layer.1                                        | T5LayerFF                  | 4 M   \\n\",\n            \"203 | model.encoder.block.11.layer.1.DenseReluDense                         | T5DenseReluDense           | 4 M   \\n\",\n            \"204 | model.encoder.block.11.layer.1.DenseReluDense.wi                      | Linear                     | 2 M   \\n\",\n            \"205 | model.encoder.block.11.layer.1.DenseReluDense.wo                      | Linear                     | 2 M   \\n\",\n            \"206 | model.encoder.block.11.layer.1.DenseReluDense.dropout                 | Dropout                    | 0     \\n\",\n            \"207 | model.encoder.block.11.layer.1.layer_norm                             | T5LayerNorm                | 768   \\n\",\n            \"208 | model.encoder.block.11.layer.1.dropout                                | Dropout                    | 0     \\n\",\n            \"209 | model.encoder.final_layer_norm                                        | T5LayerNorm                | 768   \\n\",\n            \"210 | model.encoder.dropout                                                 | Dropout                    | 0     \\n\",\n            \"211 | model.decoder                                                         | T5Stack                    | 137 M \\n\",\n            \"212 | model.decoder.block                                                   | ModuleList                 | 113 M \\n\",\n            \"213 | model.decoder.block.0                                                 | T5Block                    | 9 M   \\n\",\n            \"214 | model.decoder.block.0.layer                                           | ModuleList                 | 9 M   \\n\",\n            \"215 | model.decoder.block.0.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"216 | model.decoder.block.0.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"217 | model.decoder.block.0.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"218 | model.decoder.block.0.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"219 | model.decoder.block.0.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"220 | model.decoder.block.0.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"221 | model.decoder.block.0.layer.0.SelfAttention.relative_attention_bias   | Embedding                  | 384   \\n\",\n            \"222 | model.decoder.block.0.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"223 | model.decoder.block.0.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"224 | model.decoder.block.0.layer.1                                         | T5LayerCrossAttention      | 2 M   \\n\",\n            \"225 | model.decoder.block.0.layer.1.EncDecAttention                         | T5Attention                | 2 M   \\n\",\n            \"226 | model.decoder.block.0.layer.1.EncDecAttention.q                       | Linear                     | 589 K \\n\",\n            \"227 | model.decoder.block.0.layer.1.EncDecAttention.k                       | Linear                     | 589 K \\n\",\n            \"228 | model.decoder.block.0.layer.1.EncDecAttention.v                       | Linear                     | 589 K \\n\",\n            \"229 | model.decoder.block.0.layer.1.EncDecAttention.o                       | Linear                     | 589 K \\n\",\n            \"230 | model.decoder.block.0.layer.1.EncDecAttention.relative_attention_bias | Embedding                  | 384   \\n\",\n            \"231 | model.decoder.block.0.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"232 | model.decoder.block.0.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"233 | model.decoder.block.0.layer.2                                         | T5LayerFF                  | 4 M   \\n\",\n            \"234 | model.decoder.block.0.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"235 | model.decoder.block.0.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"236 | model.decoder.block.0.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"237 | model.decoder.block.0.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"238 | model.decoder.block.0.layer.2.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"239 | model.decoder.block.0.layer.2.dropout                                 | Dropout                    | 0     \\n\",\n            \"240 | model.decoder.block.1                                                 | T5Block                    | 9 M   \\n\",\n            \"241 | model.decoder.block.1.layer                                           | ModuleList                 | 9 M   \\n\",\n            \"242 | model.decoder.block.1.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"243 | model.decoder.block.1.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"244 | model.decoder.block.1.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"245 | model.decoder.block.1.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"246 | model.decoder.block.1.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"247 | model.decoder.block.1.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"248 | model.decoder.block.1.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"249 | model.decoder.block.1.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"250 | model.decoder.block.1.layer.1                                         | T5LayerCrossAttention      | 2 M   \\n\",\n            \"251 | model.decoder.block.1.layer.1.EncDecAttention                         | T5Attention                | 2 M   \\n\",\n            \"252 | model.decoder.block.1.layer.1.EncDecAttention.q                       | Linear                     | 589 K \\n\",\n            \"253 | model.decoder.block.1.layer.1.EncDecAttention.k                       | Linear                     | 589 K \\n\",\n            \"254 | model.decoder.block.1.layer.1.EncDecAttention.v                       | Linear                     | 589 K \\n\",\n            \"255 | model.decoder.block.1.layer.1.EncDecAttention.o                       | Linear                     | 589 K \\n\",\n            \"256 | model.decoder.block.1.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"257 | model.decoder.block.1.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"258 | model.decoder.block.1.layer.2                                         | T5LayerFF                  | 4 M   \\n\",\n            \"259 | model.decoder.block.1.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"260 | model.decoder.block.1.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"261 | model.decoder.block.1.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"262 | model.decoder.block.1.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"263 | model.decoder.block.1.layer.2.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"264 | model.decoder.block.1.layer.2.dropout                                 | Dropout                    | 0     \\n\",\n            \"265 | model.decoder.block.2                                                 | T5Block                    | 9 M   \\n\",\n            \"266 | model.decoder.block.2.layer                                           | ModuleList                 | 9 M   \\n\",\n            \"267 | model.decoder.block.2.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"268 | model.decoder.block.2.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"269 | model.decoder.block.2.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"270 | model.decoder.block.2.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"271 | model.decoder.block.2.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"272 | model.decoder.block.2.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"273 | model.decoder.block.2.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"274 | model.decoder.block.2.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"275 | model.decoder.block.2.layer.1                                         | T5LayerCrossAttention      | 2 M   \\n\",\n            \"276 | model.decoder.block.2.layer.1.EncDecAttention                         | T5Attention                | 2 M   \\n\",\n            \"277 | model.decoder.block.2.layer.1.EncDecAttention.q                       | Linear                     | 589 K \\n\",\n            \"278 | model.decoder.block.2.layer.1.EncDecAttention.k                       | Linear                     | 589 K \\n\",\n            \"279 | model.decoder.block.2.layer.1.EncDecAttention.v                       | Linear                     | 589 K \\n\",\n            \"280 | model.decoder.block.2.layer.1.EncDecAttention.o                       | Linear                     | 589 K \\n\",\n            \"281 | model.decoder.block.2.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"282 | model.decoder.block.2.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"283 | model.decoder.block.2.layer.2                                         | T5LayerFF                  | 4 M   \\n\",\n            \"284 | model.decoder.block.2.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"285 | model.decoder.block.2.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"286 | model.decoder.block.2.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"287 | model.decoder.block.2.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"288 | model.decoder.block.2.layer.2.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"289 | model.decoder.block.2.layer.2.dropout                                 | Dropout                    | 0     \\n\",\n            \"290 | model.decoder.block.3                                                 | T5Block                    | 9 M   \\n\",\n            \"291 | model.decoder.block.3.layer                                           | ModuleList                 | 9 M   \\n\",\n            \"292 | model.decoder.block.3.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"293 | model.decoder.block.3.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"294 | model.decoder.block.3.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"295 | model.decoder.block.3.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"296 | model.decoder.block.3.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"297 | model.decoder.block.3.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"298 | model.decoder.block.3.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"299 | model.decoder.block.3.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"300 | model.decoder.block.3.layer.1                                         | T5LayerCrossAttention      | 2 M   \\n\",\n            \"301 | model.decoder.block.3.layer.1.EncDecAttention                         | T5Attention                | 2 M   \\n\",\n            \"302 | model.decoder.block.3.layer.1.EncDecAttention.q                       | Linear                     | 589 K \\n\",\n            \"303 | model.decoder.block.3.layer.1.EncDecAttention.k                       | Linear                     | 589 K \\n\",\n            \"304 | model.decoder.block.3.layer.1.EncDecAttention.v                       | Linear                     | 589 K \\n\",\n            \"305 | model.decoder.block.3.layer.1.EncDecAttention.o                       | Linear                     | 589 K \\n\",\n            \"306 | model.decoder.block.3.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"307 | model.decoder.block.3.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"308 | model.decoder.block.3.layer.2                                         | T5LayerFF                  | 4 M   \\n\",\n            \"309 | model.decoder.block.3.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"310 | model.decoder.block.3.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"311 | model.decoder.block.3.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"312 | model.decoder.block.3.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"313 | model.decoder.block.3.layer.2.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"314 | model.decoder.block.3.layer.2.dropout                                 | Dropout                    | 0     \\n\",\n            \"315 | model.decoder.block.4                                                 | T5Block                    | 9 M   \\n\",\n            \"316 | model.decoder.block.4.layer                                           | ModuleList                 | 9 M   \\n\",\n            \"317 | model.decoder.block.4.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"318 | model.decoder.block.4.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"319 | model.decoder.block.4.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"320 | model.decoder.block.4.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"321 | model.decoder.block.4.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"322 | model.decoder.block.4.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"323 | model.decoder.block.4.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"324 | model.decoder.block.4.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"325 | model.decoder.block.4.layer.1                                         | T5LayerCrossAttention      | 2 M   \\n\",\n            \"326 | model.decoder.block.4.layer.1.EncDecAttention                         | T5Attention                | 2 M   \\n\",\n            \"327 | model.decoder.block.4.layer.1.EncDecAttention.q                       | Linear                     | 589 K \\n\",\n            \"328 | model.decoder.block.4.layer.1.EncDecAttention.k                       | Linear                     | 589 K \\n\",\n            \"329 | model.decoder.block.4.layer.1.EncDecAttention.v                       | Linear                     | 589 K \\n\",\n            \"330 | model.decoder.block.4.layer.1.EncDecAttention.o                       | Linear                     | 589 K \\n\",\n            \"331 | model.decoder.block.4.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"332 | model.decoder.block.4.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"333 | model.decoder.block.4.layer.2                                         | T5LayerFF                  | 4 M   \\n\",\n            \"334 | model.decoder.block.4.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"335 | model.decoder.block.4.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"336 | model.decoder.block.4.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"337 | model.decoder.block.4.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"338 | model.decoder.block.4.layer.2.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"339 | model.decoder.block.4.layer.2.dropout                                 | Dropout                    | 0     \\n\",\n            \"340 | model.decoder.block.5                                                 | T5Block                    | 9 M   \\n\",\n            \"341 | model.decoder.block.5.layer                                           | ModuleList                 | 9 M   \\n\",\n            \"342 | model.decoder.block.5.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"343 | model.decoder.block.5.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"344 | model.decoder.block.5.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"345 | model.decoder.block.5.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"346 | model.decoder.block.5.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"347 | model.decoder.block.5.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"348 | model.decoder.block.5.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"349 | model.decoder.block.5.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"350 | model.decoder.block.5.layer.1                                         | T5LayerCrossAttention      | 2 M   \\n\",\n            \"351 | model.decoder.block.5.layer.1.EncDecAttention                         | T5Attention                | 2 M   \\n\",\n            \"352 | model.decoder.block.5.layer.1.EncDecAttention.q                       | Linear                     | 589 K \\n\",\n            \"353 | model.decoder.block.5.layer.1.EncDecAttention.k                       | Linear                     | 589 K \\n\",\n            \"354 | model.decoder.block.5.layer.1.EncDecAttention.v                       | Linear                     | 589 K \\n\",\n            \"355 | model.decoder.block.5.layer.1.EncDecAttention.o                       | Linear                     | 589 K \\n\",\n            \"356 | model.decoder.block.5.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"357 | model.decoder.block.5.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"358 | model.decoder.block.5.layer.2                                         | T5LayerFF                  | 4 M   \\n\",\n            \"359 | model.decoder.block.5.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"360 | model.decoder.block.5.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"361 | model.decoder.block.5.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"362 | model.decoder.block.5.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"363 | model.decoder.block.5.layer.2.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"364 | model.decoder.block.5.layer.2.dropout                                 | Dropout                    | 0     \\n\",\n            \"365 | model.decoder.block.6                                                 | T5Block                    | 9 M   \\n\",\n            \"366 | model.decoder.block.6.layer                                           | ModuleList                 | 9 M   \\n\",\n            \"367 | model.decoder.block.6.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"368 | model.decoder.block.6.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"369 | model.decoder.block.6.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"370 | model.decoder.block.6.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"371 | model.decoder.block.6.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"372 | model.decoder.block.6.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"373 | model.decoder.block.6.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"374 | model.decoder.block.6.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"375 | model.decoder.block.6.layer.1                                         | T5LayerCrossAttention      | 2 M   \\n\",\n            \"376 | model.decoder.block.6.layer.1.EncDecAttention                         | T5Attention                | 2 M   \\n\",\n            \"377 | model.decoder.block.6.layer.1.EncDecAttention.q                       | Linear                     | 589 K \\n\",\n            \"378 | model.decoder.block.6.layer.1.EncDecAttention.k                       | Linear                     | 589 K \\n\",\n            \"379 | model.decoder.block.6.layer.1.EncDecAttention.v                       | Linear                     | 589 K \\n\",\n            \"380 | model.decoder.block.6.layer.1.EncDecAttention.o                       | Linear                     | 589 K \\n\",\n            \"381 | model.decoder.block.6.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"382 | model.decoder.block.6.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"383 | model.decoder.block.6.layer.2                                         | T5LayerFF                  | 4 M   \\n\",\n            \"384 | model.decoder.block.6.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"385 | model.decoder.block.6.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"386 | model.decoder.block.6.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"387 | model.decoder.block.6.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"388 | model.decoder.block.6.layer.2.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"389 | model.decoder.block.6.layer.2.dropout                                 | Dropout                    | 0     \\n\",\n            \"390 | model.decoder.block.7                                                 | T5Block                    | 9 M   \\n\",\n            \"391 | model.decoder.block.7.layer                                           | ModuleList                 | 9 M   \\n\",\n            \"392 | model.decoder.block.7.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"393 | model.decoder.block.7.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"394 | model.decoder.block.7.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"395 | model.decoder.block.7.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"396 | model.decoder.block.7.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"397 | model.decoder.block.7.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"398 | model.decoder.block.7.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"399 | model.decoder.block.7.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"400 | model.decoder.block.7.layer.1                                         | T5LayerCrossAttention      | 2 M   \\n\",\n            \"401 | model.decoder.block.7.layer.1.EncDecAttention                         | T5Attention                | 2 M   \\n\",\n            \"402 | model.decoder.block.7.layer.1.EncDecAttention.q                       | Linear                     | 589 K \\n\",\n            \"403 | model.decoder.block.7.layer.1.EncDecAttention.k                       | Linear                     | 589 K \\n\",\n            \"404 | model.decoder.block.7.layer.1.EncDecAttention.v                       | Linear                     | 589 K \\n\",\n            \"405 | model.decoder.block.7.layer.1.EncDecAttention.o                       | Linear                     | 589 K \\n\",\n            \"406 | model.decoder.block.7.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"407 | model.decoder.block.7.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"408 | model.decoder.block.7.layer.2                                         | T5LayerFF                  | 4 M   \\n\",\n            \"409 | model.decoder.block.7.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"410 | model.decoder.block.7.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"411 | model.decoder.block.7.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"412 | model.decoder.block.7.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"413 | model.decoder.block.7.layer.2.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"414 | model.decoder.block.7.layer.2.dropout                                 | Dropout                    | 0     \\n\",\n            \"415 | model.decoder.block.8                                                 | T5Block                    | 9 M   \\n\",\n            \"416 | model.decoder.block.8.layer                                           | ModuleList                 | 9 M   \\n\",\n            \"417 | model.decoder.block.8.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"418 | model.decoder.block.8.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"419 | model.decoder.block.8.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"420 | model.decoder.block.8.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"421 | model.decoder.block.8.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"422 | model.decoder.block.8.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"423 | model.decoder.block.8.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"424 | model.decoder.block.8.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"425 | model.decoder.block.8.layer.1                                         | T5LayerCrossAttention      | 2 M   \\n\",\n            \"426 | model.decoder.block.8.layer.1.EncDecAttention                         | T5Attention                | 2 M   \\n\",\n            \"427 | model.decoder.block.8.layer.1.EncDecAttention.q                       | Linear                     | 589 K \\n\",\n            \"428 | model.decoder.block.8.layer.1.EncDecAttention.k                       | Linear                     | 589 K \\n\",\n            \"429 | model.decoder.block.8.layer.1.EncDecAttention.v                       | Linear                     | 589 K \\n\",\n            \"430 | model.decoder.block.8.layer.1.EncDecAttention.o                       | Linear                     | 589 K \\n\",\n            \"431 | model.decoder.block.8.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"432 | model.decoder.block.8.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"433 | model.decoder.block.8.layer.2                                         | T5LayerFF                  | 4 M   \\n\",\n            \"434 | model.decoder.block.8.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"435 | model.decoder.block.8.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"436 | model.decoder.block.8.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"437 | model.decoder.block.8.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"438 | model.decoder.block.8.layer.2.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"439 | model.decoder.block.8.layer.2.dropout                                 | Dropout                    | 0     \\n\",\n            \"440 | model.decoder.block.9                                                 | T5Block                    | 9 M   \\n\",\n            \"441 | model.decoder.block.9.layer                                           | ModuleList                 | 9 M   \\n\",\n            \"442 | model.decoder.block.9.layer.0                                         | T5LayerSelfAttention       | 2 M   \\n\",\n            \"443 | model.decoder.block.9.layer.0.SelfAttention                           | T5Attention                | 2 M   \\n\",\n            \"444 | model.decoder.block.9.layer.0.SelfAttention.q                         | Linear                     | 589 K \\n\",\n            \"445 | model.decoder.block.9.layer.0.SelfAttention.k                         | Linear                     | 589 K \\n\",\n            \"446 | model.decoder.block.9.layer.0.SelfAttention.v                         | Linear                     | 589 K \\n\",\n            \"447 | model.decoder.block.9.layer.0.SelfAttention.o                         | Linear                     | 589 K \\n\",\n            \"448 | model.decoder.block.9.layer.0.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"449 | model.decoder.block.9.layer.0.dropout                                 | Dropout                    | 0     \\n\",\n            \"450 | model.decoder.block.9.layer.1                                         | T5LayerCrossAttention      | 2 M   \\n\",\n            \"451 | model.decoder.block.9.layer.1.EncDecAttention                         | T5Attention                | 2 M   \\n\",\n            \"452 | model.decoder.block.9.layer.1.EncDecAttention.q                       | Linear                     | 589 K \\n\",\n            \"453 | model.decoder.block.9.layer.1.EncDecAttention.k                       | Linear                     | 589 K \\n\",\n            \"454 | model.decoder.block.9.layer.1.EncDecAttention.v                       | Linear                     | 589 K \\n\",\n            \"455 | model.decoder.block.9.layer.1.EncDecAttention.o                       | Linear                     | 589 K \\n\",\n            \"456 | model.decoder.block.9.layer.1.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"457 | model.decoder.block.9.layer.1.dropout                                 | Dropout                    | 0     \\n\",\n            \"458 | model.decoder.block.9.layer.2                                         | T5LayerFF                  | 4 M   \\n\",\n            \"459 | model.decoder.block.9.layer.2.DenseReluDense                          | T5DenseReluDense           | 4 M   \\n\",\n            \"460 | model.decoder.block.9.layer.2.DenseReluDense.wi                       | Linear                     | 2 M   \\n\",\n            \"461 | model.decoder.block.9.layer.2.DenseReluDense.wo                       | Linear                     | 2 M   \\n\",\n            \"462 | model.decoder.block.9.layer.2.DenseReluDense.dropout                  | Dropout                    | 0     \\n\",\n            \"463 | model.decoder.block.9.layer.2.layer_norm                              | T5LayerNorm                | 768   \\n\",\n            \"464 | model.decoder.block.9.layer.2.dropout                                 | Dropout                    | 0     \\n\",\n            \"465 | model.decoder.block.10                                                | T5Block                    | 9 M   \\n\",\n            \"466 | model.decoder.block.10.layer                                          | ModuleList                 | 9 M   \\n\",\n            \"467 | model.decoder.block.10.layer.0                                        | T5LayerSelfAttention       | 2 M   \\n\",\n            \"468 | model.decoder.block.10.layer.0.SelfAttention                          | T5Attention                | 2 M   \\n\",\n            \"469 | model.decoder.block.10.layer.0.SelfAttention.q                        | Linear                     | 589 K \\n\",\n            \"470 | model.decoder.block.10.layer.0.SelfAttention.k                        | Linear                     | 589 K \\n\",\n            \"471 | model.decoder.block.10.layer.0.SelfAttention.v                        | Linear                     | 589 K \\n\",\n            \"472 | model.decoder.block.10.layer.0.SelfAttention.o                        | Linear                     | 589 K \\n\",\n            \"473 | model.decoder.block.10.layer.0.layer_norm                             | T5LayerNorm                | 768   \\n\",\n            \"474 | model.decoder.block.10.layer.0.dropout                                | Dropout                    | 0     \\n\",\n            \"475 | model.decoder.block.10.layer.1                                        | T5LayerCrossAttention      | 2 M   \\n\",\n            \"476 | model.decoder.block.10.layer.1.EncDecAttention                        | T5Attention                | 2 M   \\n\",\n            \"477 | model.decoder.block.10.layer.1.EncDecAttention.q                      | Linear                     | 589 K \\n\",\n            \"478 | model.decoder.block.10.layer.1.EncDecAttention.k                      | Linear                     | 589 K \\n\",\n            \"479 | model.decoder.block.10.layer.1.EncDecAttention.v                      | Linear                     | 589 K \\n\",\n            \"480 | model.decoder.block.10.layer.1.EncDecAttention.o                      | Linear                     | 589 K \\n\",\n            \"481 | model.decoder.block.10.layer.1.layer_norm                             | T5LayerNorm                | 768   \\n\",\n            \"482 | model.decoder.block.10.layer.1.dropout                                | Dropout                    | 0     \\n\",\n            \"483 | model.decoder.block.10.layer.2                                        | T5LayerFF                  | 4 M   \\n\",\n            \"484 | model.decoder.block.10.layer.2.DenseReluDense                         | T5DenseReluDense           | 4 M   \\n\",\n            \"485 | model.decoder.block.10.layer.2.DenseReluDense.wi                      | Linear                     | 2 M   \\n\",\n            \"486 | model.decoder.block.10.layer.2.DenseReluDense.wo                      | Linear                     | 2 M   \\n\",\n            \"487 | model.decoder.block.10.layer.2.DenseReluDense.dropout                 | Dropout                    | 0     \\n\",\n            \"488 | model.decoder.block.10.layer.2.layer_norm                             | T5LayerNorm                | 768   \\n\",\n            \"489 | model.decoder.block.10.layer.2.dropout                                | Dropout                    | 0     \\n\",\n            \"490 | model.decoder.block.11                                                | T5Block                    | 9 M   \\n\",\n            \"491 | model.decoder.block.11.layer                                          | ModuleList                 | 9 M   \\n\",\n            \"492 | model.decoder.block.11.layer.0                                        | T5LayerSelfAttention       | 2 M   \\n\",\n            \"493 | model.decoder.block.11.layer.0.SelfAttention                          | T5Attention                | 2 M   \\n\",\n            \"494 | model.decoder.block.11.layer.0.SelfAttention.q                        | Linear                     | 589 K \\n\",\n            \"495 | model.decoder.block.11.layer.0.SelfAttention.k                        | Linear                     | 589 K \\n\",\n            \"496 | model.decoder.block.11.layer.0.SelfAttention.v                        | Linear                     | 589 K \\n\",\n            \"497 | model.decoder.block.11.layer.0.SelfAttention.o                        | Linear                     | 589 K \\n\",\n            \"498 | model.decoder.block.11.layer.0.layer_norm                             | T5LayerNorm                | 768   \\n\",\n            \"499 | model.decoder.block.11.layer.0.dropout                                | Dropout                    | 0     \\n\",\n            \"500 | model.decoder.block.11.layer.1                                        | T5LayerCrossAttention      | 2 M   \\n\",\n            \"501 | model.decoder.block.11.layer.1.EncDecAttention                        | T5Attention                | 2 M   \\n\",\n            \"502 | model.decoder.block.11.layer.1.EncDecAttention.q                      | Linear                     | 589 K \\n\",\n            \"503 | model.decoder.block.11.layer.1.EncDecAttention.k                      | Linear                     | 589 K \\n\",\n            \"504 | model.decoder.block.11.layer.1.EncDecAttention.v                      | Linear                     | 589 K \\n\",\n            \"505 | model.decoder.block.11.layer.1.EncDecAttention.o                      | Linear                     | 589 K \\n\",\n            \"506 | model.decoder.block.11.layer.1.layer_norm                             | T5LayerNorm                | 768   \\n\",\n            \"507 | model.decoder.block.11.layer.1.dropout                                | Dropout                    | 0     \\n\",\n            \"508 | model.decoder.block.11.layer.2                                        | T5LayerFF                  | 4 M   \\n\",\n            \"509 | model.decoder.block.11.layer.2.DenseReluDense                         | T5DenseReluDense           | 4 M   \\n\",\n            \"510 | model.decoder.block.11.layer.2.DenseReluDense.wi                      | Linear                     | 2 M   \\n\",\n            \"511 | model.decoder.block.11.layer.2.DenseReluDense.wo                      | Linear                     | 2 M   \\n\",\n            \"512 | model.decoder.block.11.layer.2.DenseReluDense.dropout                 | Dropout                    | 0     \\n\",\n            \"513 | model.decoder.block.11.layer.2.layer_norm                             | T5LayerNorm                | 768   \\n\",\n            \"514 | model.decoder.block.11.layer.2.dropout                                | Dropout                    | 0     \\n\",\n            \"515 | model.decoder.final_layer_norm                                        | T5LayerNorm                | 768   \\n\",\n            \"516 | model.decoder.dropout                                                 | Dropout                    | 0     \\n\",\n            \"517 | model.lm_head                                                         | Linear                     | 24 M  \\n\",\n            \"INFO:__main__:LOOKING AT swag_data dev\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"8e79d03deee94b299431330441bd64c8\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:__main__:LOOKING AT swag_data train\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\r\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:__main__:LOOKING AT swag_data dev\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"68705cee3df5458fb5145046337d925c\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"/pytorch/torch/csrc/utils/python_arg_parser.cpp:756: UserWarning: This overload of add_ is deprecated:\\n\",\n            \"\\tadd_(Number alpha, Tensor other)\\n\",\n            \"Consider using one of the following signatures instead:\\n\",\n            \"\\tadd_(Tensor other, *, Number alpha)\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"69f6eb1cb0434128961b5d83529813c5\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:__main__:***** Validation results *****\\n\",\n            \"INFO:__main__:avg_val_loss = tensor(0.3535, device='cuda:0')\\n\",\n            \"\\n\",\n            \"INFO:__main__:loss = tensor(0.3080, device='cuda:0')\\n\",\n            \"\\n\",\n            \"INFO:__main__:train_loss = tensor(0.3080, device='cuda:0')\\n\",\n            \"\\n\",\n            \"INFO:__main__:val_loss = tensor(0.3535, device='cuda:0')\\n\",\n            \"\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"0cfc8fa73f164b4fa5ddcbc3f115ef9b\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:__main__:***** Validation results *****\\n\",\n            \"INFO:__main__:avg_train_loss = tensor(0.5107, device='cuda:0')\\n\",\n            \"\\n\",\n            \"INFO:__main__:avg_val_loss = tensor(0.3268, device='cuda:0')\\n\",\n            \"\\n\",\n            \"INFO:__main__:epoch = 0\\n\",\n            \"\\n\",\n            \"INFO:__main__:loss = tensor(0.5484, device='cuda:0')\\n\",\n            \"\\n\",\n            \"INFO:__main__:train_loss = tensor(0.5484, device='cuda:0')\\n\",\n            \"\\n\",\n            \"INFO:__main__:val_loss = tensor(0.3268, device='cuda:0')\\n\",\n            \"\\n\",\n            \"INFO:lightning:Detected KeyboardInterrupt, attempting graceful shutdown...\\n\"\n          ],\n          \"name\": \"stderr\"\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"execute_result\",\n          \"data\": {\n            \"text/plain\": [\n              \"1\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          },\n          \"execution_count\": 22\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"o1ZB_6SK7V-3\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"AgNV3TMzqSvj\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"### Eval\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"colab_type\": \"code\",\n        \"id\": \"gFFOwfXyqc4_\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"import textwrap\\n\",\n        \"from tqdm.auto import tqdm\\n\",\n        \"from sklearn import metrics\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"colab_type\": \"code\",\n        \"id\": \"rsYCq3Lwqc5Y\",\n        \"outputId\": \"51f7bd88-2441-42be-e8f3-adc0337a164c\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 34\n        }\n      },\n      \"source\": [\n        \"dataset =  SwagDataset(tokenizer, data_dir='swag_data', type_path='val')\\n\",\n        \"loader = DataLoader(dataset, batch_size=32, num_workers=4)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"INFO:__main__:LOOKING AT swag_data dev\\n\"\n          ],\n          \"name\": \"stderr\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"colab_type\": \"code\",\n        \"outputId\": \"81e7d67d-1d15-4dea-a552-695cfe8ef105\",\n        \"id\": \"KHwMBQNjqc5h\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 66,\n          \"referenced_widgets\": [\n            \"1597779d89464892885045be715890a8\",\n            \"8a42468ed6b945e8bfce1803f3ea4452\",\n            \"f87eae824cf1492b9555b78648a9f261\",\n            \"6cd0d574b5fd43588b8d492674125218\",\n            \"17b25142ac744ba882e2bbd1f42c1db2\",\n            \"09185d325ef84c1fad7b07fbd9eeed31\",\n            \"ba31765789dc46229493674dab21921d\",\n            \"a9dd88fb73374e108482b80993b998eb\"\n          ]\n        }\n      },\n      \"source\": [\n        \"model.model.eval()\\n\",\n        \"outputs = []\\n\",\n        \"targets = []\\n\",\n        \"for batch in tqdm(loader):\\n\",\n        \"  outs = model.model.generate(input_ids=batch['source_ids'].cuda(), \\n\",\n        \"                              attention_mask=batch['source_mask'].cuda(), \\n\",\n        \"                              max_length=2)\\n\",\n        \"\\n\",\n        \"  dec = [tokenizer.decode(ids) for ids in outs]\\n\",\n        \"  target = [tokenizer.decode(ids) for ids in batch[\\\"target_ids\\\"]]\\n\",\n        \"  \\n\",\n        \"  outputs.extend(dec)\\n\",\n        \"  targets.extend(target)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"1597779d89464892885045be715890a8\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=0.0, max=626.0), HTML(value='')))\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"ZbTValmYq15r\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"for i, out in enumerate(outputs):\\n\",\n        \"  if out not in \\\"1234\\\":\\n\",\n        \"    print(i, 'detected invalid prediction')\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"jN35n2pas-pF\",\n        \"colab_type\": \"code\",\n        \"outputId\": \"be8a3507-8e66-479d-c41c-dd9cb0603742\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 34\n        }\n      },\n      \"source\": [\n        \"metrics.accuracy_score(targets, outputs)\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": [\n        {\n          \"output_type\": \"execute_result\",\n          \"data\": {\n            \"text/plain\": [\n              \"0.7397280815755274\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          },\n          \"execution_count\": 28\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"t_WaMutznvGb\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"This is great! We have achieved almost 74% accuracy with this simple formulation. This is great becuase with BERT like models to make a prediction on single example the model needs to do 4 forward passes, one for each possible endings and then the logits are concatenated together for all 4 passes and then passed through final softmax layer to produce 4 probabilities. This approach needs only a single pass for one example.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"rFgOHlW_tHPd\",\n        \"colab_type\": \"code\",\n        \"colab\": {}\n      },\n      \"source\": [\n        \"\"\n      ],\n      \"execution_count\": 0,\n      \"outputs\": []\n    }\n  ]\n}"
  }
]