[
  {
    "path": ".idea/misc.xml",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n  <component name=\"ProjectRootManager\" version=\"2\" project-jdk-name=\"Python 3.6.3 (~/anaconda3/bin/python)\" project-jdk-type=\"Python SDK\" />\n</project>"
  },
  {
    "path": ".idea/modules.xml",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n  <component name=\"ProjectModuleManager\">\n    <modules>\n      <module fileurl=\"file://$PROJECT_DIR$/.idea/textualEntailenent.iml\" filepath=\"$PROJECT_DIR$/.idea/textualEntailenent.iml\" />\n    </modules>\n  </component>\n</project>"
  },
  {
    "path": ".idea/textualEntailenent.iml",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<module type=\"PYTHON_MODULE\" version=\"4\">\n  <component name=\"NewModuleRootManager\">\n    <content url=\"file://$MODULE_DIR$\" />\n    <orderEntry type=\"inheritedJdk\" />\n    <orderEntry type=\"sourceFolder\" forTests=\"false\" />\n  </component>\n  <component name=\"TestRunnerService\">\n    <option name=\"projectConfiguration\" value=\"Nosetests\" />\n    <option name=\"PROJECT_TEST_RUNNER\" value=\"Nosetests\" />\n  </component>\n</module>"
  },
  {
    "path": ".idea/vcs.xml",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n  <component name=\"VcsDirectoryMappings\">\n    <mapping directory=\"$PROJECT_DIR$\" vcs=\"Git\" />\n  </component>\n</project>"
  },
  {
    "path": ".idea/workspace.xml",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n  <component name=\"ChangeListManager\">\n    <list default=\"true\" id=\"e151fde8-248a-458d-a5f7-531c8b11572b\" name=\"Default\" comment=\"\">\n      <change type=\"NEW\" beforePath=\"\" afterPath=\"$PROJECT_DIR$/image/project_route.png\" />\n      <change type=\"NEW\" beforePath=\"\" afterPath=\"$PROJECT_DIR$/.idea/vcs.xml\" />\n      <change type=\"MODIFICATION\" beforePath=\"$PROJECT_DIR$/README.md\" afterPath=\"$PROJECT_DIR$/README.md\" />\n    </list>\n    <option name=\"EXCLUDED_CONVERTED_TO_IGNORED\" value=\"true\" />\n    <option name=\"TRACKING_ENABLED\" value=\"true\" />\n    <option name=\"SHOW_DIALOG\" value=\"false\" />\n    <option name=\"HIGHLIGHT_CONFLICTS\" value=\"true\" />\n    <option name=\"HIGHLIGHT_NON_ACTIVE_CHANGELIST\" value=\"false\" />\n    <option name=\"LAST_RESOLUTION\" value=\"IGNORE\" />\n  </component>\n  <component name=\"FileEditorManager\">\n    <leaf>\n      <file leaf-file-name=\"lstm_train.py\" pinned=\"false\" current-in-tab=\"false\">\n        <entry file=\"file://$PROJECT_DIR$/lstm_train.py\">\n          <provider selected=\"true\" editor-type-id=\"text-editor\">\n            <state relative-caret-position=\"136\">\n              <caret line=\"204\" column=\"31\" lean-forward=\"false\" selection-start-line=\"204\" selection-start-column=\"31\" selection-end-line=\"204\" selection-end-column=\"31\" />\n              <folding />\n            </state>\n          </provider>\n        </entry>\n      </file>\n      <file leaf-file-name=\"README.md\" pinned=\"false\" current-in-tab=\"true\">\n        <entry file=\"file://$PROJECT_DIR$/README.md\">\n          <provider selected=\"true\" editor-type-id=\"text-editor\">\n            <state relative-caret-position=\"243\">\n              <caret line=\"144\" column=\"0\" lean-forward=\"false\" selection-start-line=\"144\" selection-start-column=\"0\" selection-end-line=\"144\" selection-end-column=\"0\" />\n              <folding />\n            </state>\n          </provider>\n        </entry>\n      </file>\n      <file leaf-file-name=\"train.txt\" pinned=\"false\" current-in-tab=\"false\">\n        <entry file=\"file://$PROJECT_DIR$/data/train.txt\">\n          <provider selected=\"true\" editor-type-id=\"LargeFileEditor\">\n            <state relative-caret-position=\"360\">\n              <caret line=\"548\" column=\"38\" lean-forward=\"false\" selection-start-line=\"534\" selection-start-column=\"0\" selection-end-line=\"548\" selection-end-column=\"38\" />\n            </state>\n          </provider>\n        </entry>\n      </file>\n      <file leaf-file-name=\"translate_duba.py\" pinned=\"false\" current-in-tab=\"false\">\n        <entry file=\"file://$PROJECT_DIR$/translate_duba.py\">\n          <provider selected=\"true\" editor-type-id=\"text-editor\">\n            <state relative-caret-position=\"162\">\n              <caret line=\"10\" column=\"19\" lean-forward=\"false\" selection-start-line=\"4\" selection-start-column=\"0\" selection-end-line=\"34\" selection-end-column=\"21\" />\n              <folding />\n            </state>\n          </provider>\n        </entry>\n      </file>\n    </leaf>\n  </component>\n  <component name=\"Git.Settings\">\n    <option name=\"RECENT_GIT_ROOT_PATH\" value=\"$PROJECT_DIR$\" />\n  </component>\n  <component name=\"IdeDocumentHistory\">\n    <option name=\"CHANGED_PATHS\">\n      <list>\n        <option value=\"$PROJECT_DIR$/README.md\" />\n      </list>\n    </option>\n  </component>\n  <component name=\"ProjectFrameBounds\">\n    <option name=\"x\" value=\"129\" />\n    <option name=\"y\" value=\"490\" />\n    <option name=\"width\" value=\"1366\" />\n    <option name=\"height\" value=\"701\" />\n  </component>\n  <component name=\"ProjectView\">\n    <navigator currentView=\"ProjectPane\" proportions=\"\" version=\"1\">\n      <flattenPackages />\n      <showMembers />\n      <showModules />\n      <showLibraryContents />\n      <hideEmptyPackages />\n      <abbreviatePackageNames />\n      <autoscrollToSource />\n      <autoscrollFromSource />\n      <sortByType />\n      <manualOrder />\n      <foldersAlwaysOnTop value=\"true\" />\n    </navigator>\n    <panes>\n      <pane id=\"ProjectPane\">\n        <subPane>\n          <expand>\n            <path>\n              <item name=\"textualEntailenent\" type=\"b2602c69:ProjectViewProjectNode\" />\n              <item name=\"textualEntailenent\" type=\"462c0819:PsiDirectoryNode\" />\n            </path>\n            <path>\n              <item name=\"textualEntailenent\" type=\"b2602c69:ProjectViewProjectNode\" />\n              <item name=\"textualEntailenent\" type=\"462c0819:PsiDirectoryNode\" />\n              <item name=\"data\" type=\"462c0819:PsiDirectoryNode\" />\n            </path>\n            <path>\n              <item name=\"textualEntailenent\" type=\"b2602c69:ProjectViewProjectNode\" />\n              <item name=\"textualEntailenent\" type=\"462c0819:PsiDirectoryNode\" />\n              <item name=\"DataTranslate\" type=\"462c0819:PsiDirectoryNode\" />\n            </path>\n            <path>\n              <item name=\"textualEntailenent\" type=\"b2602c69:ProjectViewProjectNode\" />\n              <item name=\"textualEntailenent\" type=\"462c0819:PsiDirectoryNode\" />\n              <item name=\"image\" type=\"462c0819:PsiDirectoryNode\" />\n            </path>\n          </expand>\n          <select />\n        </subPane>\n      </pane>\n      <pane id=\"Scratches\" />\n      <pane id=\"Scope\" />\n    </panes>\n  </component>\n  <component name=\"PropertiesComponent\">\n    <property name=\"last_opened_file_path\" value=\"$PROJECT_DIR$\" />\n    <property name=\"SearchEverywhereHistoryKey\" value=\"&#9;FILE&#9;file:///home/lhy/Desktop/textualEntailenent/lstm_train.py\" />\n  </component>\n  <component name=\"RecentsManager\">\n    <key name=\"CopyFile.RECENT_KEYS\">\n      <recent name=\"$PROJECT_DIR$/image\" />\n    </key>\n  </component>\n  <component name=\"RunDashboard\">\n    <option name=\"ruleStates\">\n      <list>\n        <RuleState>\n          <option name=\"name\" value=\"ConfigurationTypeDashboardGroupingRule\" />\n        </RuleState>\n        <RuleState>\n          <option name=\"name\" value=\"StatusDashboardGroupingRule\" />\n        </RuleState>\n      </list>\n    </option>\n  </component>\n  <component name=\"ShelveChangesManager\" show_recycled=\"false\">\n    <option name=\"remove_strategy\" value=\"false\" />\n  </component>\n  <component name=\"TaskManager\">\n    <task active=\"true\" id=\"Default\" summary=\"Default task\">\n      <changelist id=\"e151fde8-248a-458d-a5f7-531c8b11572b\" name=\"Default\" comment=\"\" />\n      <created>1543386724429</created>\n      <option name=\"number\" value=\"Default\" />\n      <option name=\"presentableId\" value=\"Default\" />\n      <updated>1543386724429</updated>\n    </task>\n    <servers />\n  </component>\n  <component name=\"ToolWindowManager\">\n    <frame x=\"129\" y=\"490\" width=\"1366\" height=\"701\" extended-state=\"0\" />\n    <layout>\n      <window_info id=\"Project\" active=\"false\" anchor=\"left\" auto_hide=\"false\" internal_type=\"DOCKED\" type=\"DOCKED\" visible=\"true\" show_stripe_button=\"true\" weight=\"0.24963397\" sideWeight=\"0.5\" order=\"0\" side_tool=\"false\" content_ui=\"combo\" />\n      <window_info id=\"TODO\" active=\"false\" anchor=\"bottom\" auto_hide=\"false\" internal_type=\"DOCKED\" type=\"DOCKED\" visible=\"false\" show_stripe_button=\"true\" weight=\"0.33\" sideWeight=\"0.5\" order=\"6\" side_tool=\"false\" content_ui=\"tabs\" />\n      <window_info id=\"Event Log\" active=\"false\" anchor=\"bottom\" auto_hide=\"false\" internal_type=\"DOCKED\" type=\"DOCKED\" visible=\"false\" show_stripe_button=\"true\" weight=\"0.33\" sideWeight=\"0.5\" order=\"-1\" side_tool=\"true\" content_ui=\"tabs\" />\n      <window_info id=\"Run\" active=\"false\" anchor=\"bottom\" auto_hide=\"false\" internal_type=\"DOCKED\" type=\"DOCKED\" visible=\"false\" show_stripe_button=\"true\" weight=\"0.33\" sideWeight=\"0.5\" order=\"2\" side_tool=\"false\" content_ui=\"tabs\" />\n      <window_info id=\"Version Control\" active=\"false\" anchor=\"bottom\" auto_hide=\"false\" internal_type=\"DOCKED\" type=\"DOCKED\" visible=\"false\" show_stripe_button=\"true\" weight=\"0.33\" sideWeight=\"0.5\" order=\"-1\" side_tool=\"false\" content_ui=\"tabs\" />\n      <window_info id=\"Python Console\" active=\"false\" anchor=\"bottom\" auto_hide=\"false\" internal_type=\"DOCKED\" type=\"DOCKED\" visible=\"false\" show_stripe_button=\"true\" weight=\"0.33\" sideWeight=\"0.5\" order=\"-1\" side_tool=\"false\" content_ui=\"tabs\" />\n      <window_info id=\"Structure\" active=\"false\" anchor=\"left\" auto_hide=\"false\" internal_type=\"DOCKED\" type=\"DOCKED\" visible=\"false\" show_stripe_button=\"true\" weight=\"0.25\" sideWeight=\"0.5\" order=\"1\" side_tool=\"false\" content_ui=\"tabs\" />\n      <window_info id=\"Terminal\" active=\"false\" anchor=\"bottom\" auto_hide=\"false\" internal_type=\"DOCKED\" type=\"DOCKED\" visible=\"false\" show_stripe_button=\"true\" weight=\"0.33\" sideWeight=\"0.5\" order=\"-1\" side_tool=\"false\" content_ui=\"tabs\" />\n      <window_info id=\"Debug\" active=\"false\" anchor=\"bottom\" auto_hide=\"false\" internal_type=\"DOCKED\" type=\"DOCKED\" visible=\"false\" show_stripe_button=\"true\" weight=\"0.4\" sideWeight=\"0.5\" order=\"3\" side_tool=\"false\" content_ui=\"tabs\" />\n      <window_info id=\"Favorites\" active=\"false\" anchor=\"left\" auto_hide=\"false\" internal_type=\"DOCKED\" type=\"DOCKED\" visible=\"false\" show_stripe_button=\"true\" weight=\"0.33\" sideWeight=\"0.5\" order=\"-1\" side_tool=\"true\" content_ui=\"tabs\" />\n      <window_info id=\"Data View\" active=\"false\" anchor=\"right\" auto_hide=\"false\" internal_type=\"DOCKED\" type=\"DOCKED\" visible=\"false\" show_stripe_button=\"true\" weight=\"0.33\" sideWeight=\"0.5\" order=\"-1\" side_tool=\"false\" content_ui=\"tabs\" />\n      <window_info id=\"Cvs\" active=\"false\" anchor=\"bottom\" auto_hide=\"false\" internal_type=\"DOCKED\" type=\"DOCKED\" visible=\"false\" show_stripe_button=\"true\" weight=\"0.25\" sideWeight=\"0.5\" order=\"4\" side_tool=\"false\" content_ui=\"tabs\" />\n      <window_info id=\"Hierarchy\" active=\"false\" anchor=\"right\" auto_hide=\"false\" internal_type=\"DOCKED\" type=\"DOCKED\" visible=\"false\" show_stripe_button=\"true\" weight=\"0.25\" sideWeight=\"0.5\" order=\"2\" side_tool=\"false\" content_ui=\"combo\" />\n      <window_info id=\"Message\" active=\"false\" anchor=\"bottom\" auto_hide=\"false\" internal_type=\"DOCKED\" type=\"DOCKED\" visible=\"false\" show_stripe_button=\"true\" weight=\"0.33\" sideWeight=\"0.5\" order=\"0\" side_tool=\"false\" content_ui=\"tabs\" />\n      <window_info id=\"Commander\" active=\"false\" anchor=\"right\" auto_hide=\"false\" internal_type=\"DOCKED\" type=\"DOCKED\" visible=\"false\" show_stripe_button=\"true\" weight=\"0.4\" sideWeight=\"0.5\" order=\"0\" side_tool=\"false\" content_ui=\"tabs\" />\n      <window_info id=\"Find\" active=\"false\" anchor=\"bottom\" auto_hide=\"false\" internal_type=\"DOCKED\" type=\"DOCKED\" visible=\"false\" show_stripe_button=\"true\" weight=\"0.33\" sideWeight=\"0.5\" order=\"1\" side_tool=\"false\" content_ui=\"tabs\" />\n      <window_info id=\"Inspection\" active=\"false\" anchor=\"bottom\" auto_hide=\"false\" internal_type=\"DOCKED\" type=\"DOCKED\" visible=\"false\" show_stripe_button=\"true\" weight=\"0.4\" sideWeight=\"0.5\" order=\"5\" side_tool=\"false\" content_ui=\"tabs\" />\n      <window_info id=\"Ant Build\" active=\"false\" anchor=\"right\" auto_hide=\"false\" internal_type=\"DOCKED\" type=\"DOCKED\" visible=\"false\" show_stripe_button=\"true\" weight=\"0.25\" sideWeight=\"0.5\" order=\"1\" side_tool=\"false\" content_ui=\"tabs\" />\n    </layout>\n  </component>\n  <component name=\"VcsContentAnnotationSettings\">\n    <option name=\"myLimit\" value=\"2678400000\" />\n  </component>\n  <component name=\"XDebuggerManager\">\n    <breakpoint-manager />\n    <watches-manager />\n  </component>\n  <component name=\"editorHistoryManager\">\n    <entry file=\"file://$PROJECT_DIR$/translate_duba.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\">\n        <state relative-caret-position=\"162\">\n          <caret line=\"10\" column=\"19\" lean-forward=\"false\" selection-start-line=\"4\" selection-start-column=\"0\" selection-end-line=\"34\" selection-end-column=\"21\" />\n          <folding />\n        </state>\n      </provider>\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/data/train.txt\">\n      <provider selected=\"true\" editor-type-id=\"LargeFileEditor\">\n        <state relative-caret-position=\"360\">\n          <caret line=\"548\" column=\"38\" lean-forward=\"false\" selection-start-line=\"534\" selection-start-column=\"0\" selection-end-line=\"548\" selection-end-column=\"38\" />\n        </state>\n      </provider>\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/lstm_train.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\">\n        <state relative-caret-position=\"136\">\n          <caret line=\"204\" column=\"31\" lean-forward=\"false\" selection-start-line=\"204\" selection-start-column=\"31\" selection-end-line=\"204\" selection-end-column=\"31\" />\n          <folding />\n        </state>\n      </provider>\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/README.md\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\">\n        <state relative-caret-position=\"243\">\n          <caret line=\"144\" column=\"0\" lean-forward=\"false\" selection-start-line=\"144\" selection-start-column=\"0\" selection-end-line=\"144\" selection-end-column=\"0\" />\n          <folding />\n        </state>\n      </provider>\n    </entry>\n  </component>\n</project>"
  },
  {
    "path": "README.md",
    "content": "# ChineseTextualInference\nChineseTextualInference project including chinese corpus build and inferecence model, 中文文本推断项目,包括88万文本蕴含中文文本蕴含数据集的翻译与构建,基于深度学习的文本蕴含判定模型构建.\n\n# 项目介绍\n\n  文本间的推理关系，又称为文本蕴含关系 (TextualEntailment)，作为一种基本的文本间语义联系，广泛存在于自然语言文本中。简单的来说文本蕴含关系描述的是两个文本之间的推理关系，其中一个文本作为前提（premise），另一个文本作为假设（hypothesis），如果根据前提P能够推理得出假设H，那么就说P蕴含H，记做P->H,这跟一阶逻辑中的蕴含关系是类似的。  \n    目前关于文本蕴含还存在两个问题:  \n    一,中文文本蕴含数据集严重匮乏  \n    目前,关于文本蕴含的研究主要还是集中在英文,如评测中常常使用的SNLI数据集与MultiNIL:  \n    1) The Stanford Natural Language Inference (SNLI) 是斯坦福大学NLP组发布的文本蕴含识别的数据集。SNLI由人工标注的，一共包含570K个文本对，其中训练集550K，验证集10K，测试集10K，一共包含三类entailment，contradiction，neutra，上节提到的例子就是出自此数据集  \n    2) The Multi-Genre Natural Language Inference (MultiNLI)是一个众包数据集，包含433k个文本对。  \n\n   然而,在中文中,还没有出现大规模的文本蕴含数据集, CCL2018有一个文本蕴含的评测,由北京语言大学于东老师团队组织的,发布了一个数量级为10W的评测集,这是目前最大的一个文本蕴含数据集,与英文还有很大的差距。  \n   二,语言之间存在根本性差异  \n    在英文SNIL数据集中,准确率已经达到将近90%,这个准确率是在50W+数据集上得到的,而中文与英文有实质性差异,英文的文本蕴含模型无法直接应用到中文的文本蕴含当中,我们需要在中文上做技术上的PK,做本土化的创新.  \n    \n   因此,本项目将尝试完成两个任务:  \n    一, 完成与SNIL规模相当的中文文本蕴含数据集  \n    二, 基于构建起的中文文本蕴含数据集, 尝试完成模型实验  \n\n# 项目架构\n![image](https://github.com/liuhuanyong/ChineseTextualInference/blob/master/image/project_route.png)\n\n# 中文文本蕴含数据集构建\n1,英文文本蕴含数据\n\n    A snowboarder on a wide plain of snow\tA snow field with a snowboarder on it\tentailment\n    A snowboarder on a wide plain of snow\tA snowboarder gliding over a field of snow\tneutral\n    A snowboarder on a wide plain of snow\tA snowmobile in a blizzard\tneutral\n    An older women tending to a garden.\tThe lady is cooking dinner\tcontradiction\n    An older women tending to a garden.\tThe lady is weeding her garden\tneutral\n    An older women tending to a garden.\tThe lady has a garden\tentailment\n    A man in a black shirt overlooking bike maintenance.\tA man destroys a bike.\tcontradiction\n    A man in a black shirt overlooking bike maintenance.\tA man watches bike repairs.\tentailment\n    A man in a black shirt overlooking bike maintenance.\tA man learns bike maintenance.\tneutral\n    A man in a black shirt is looking at a bike in a workshop.\tA man is wearing a red shirt\tcontradiction\n    A man in a black shirt is looking at a bike in a workshop.\tA man is in a black shirt\tentailment\n    A man in a black shirt is looking at a bike in a workshop.\tA man is deciding which bike to buy\tneutral\n\n\n\n2,中英文文本语料翻译\ntranslate_duba.py\n\n    class Translate:\n        def __init__(self):\n            return\n\n        '''获取html'''\n        def get_html(self, url):\n            headers = {\n                'User-Agent': r'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '\n                              r'Chrome/45.0.2454.85 Safari/537.36 115Browser/6.0.3',\n                'Connection': 'keep-alive'\n            }\n            req = request.Request(url, headers=headers)\n            page = request.urlopen(req).read()\n            page = page.decode('utf-8')\n            return page\n\n        '''解析翻译答案'''\n        def extract_answers(self, content):\n            selector = etree.HTML(content)\n            answer = selector.xpath('//div[@class=\"in-base\"]/div/div/text()')[0]\n            return answer\n\n        '''翻译主函数'''\n        def translate(self, query):\n            url = 'http://www.iciba.com/{}'.format(query)\n            html = self.get_html(url)\n            try:\n                answer = self.extract_answers(html)\n            except Exception as e:\n                answer = query\n            return answer\n\n3,翻译后中文文本蕴含数据集\n\n    一名身穿灰色T恤的男子站在一辆卡车和一棵小树的停车收费表旁边。\t这辆卡车是绿色的。\tneutral\n    摩托车排成一排，靠在一座建筑物上。\t停车场里到处都是汽车。\tcontradiction\n    一男一女在街角接吻。\t一对男女在接吻。\tentailment\n    一名身穿绿色制服，手里拿着球的足球运动员被他的一些队友举起，而另一名穿红色球衣的球员则伸手去接球。\t这位绿色球员受伤了，他的队友正在帮助他。\tneutral\n    一个男人坐在阳光下，坐在长凳上，弹着班卓琴，而一只加拿大鹅看着。\t有个人站着弹吉他。\tcontradiction\n    一个棕色头发的女人，对着麦克风唱歌。\t一个女人唱歌。\tentailment\n    一位穿着深色外套的女士正坐着，身边有许多人。\t一位女士正试图在节日举行饮食比赛。\tneutral\n    一位水泥工人正在一家服装店外的一条新人行道上工作。\t一名工人在工作。\tcontradiction\n    巴尔从后板凳上扔出莫洛托夫鸡尾酒，就像金里奇曾经做过的那样。\t金里奇和巴尔都把莫洛托夫鸡尾酒从后排扔出去了。\tentailment\n    一群儿童和成年人在树林里的一条土路上骑自行车。\t一个家庭在乡下骑自行车。\tneutral\n    两个人手拿着一根杆子在外面工作。\t两个男人在外面捕鲸。\tcontradiction\n    这是一张男人睡在墙上或冥想的照片。\t一个人在墙附近。\tentailment\n    当三个人经过时，人行道上有建筑。\t他们最近拆毁了那里的一座建筑物。\tneutral\n    老太太坐在满是鲜花的房间里。\t这位老太太正在厨房里做蛋糕。\tcontradiction\n    游泳者潜入蓝色游泳池水中。\t有一个人在水里。\tentailment\n\n\n4, 中英文文本蕴含数据集规模\n\n   | 语言类型 | 句子数 | 蕴含句子对数|\n   |:---: | :---: | :---: |\n   |中文 | 100W | 88W |\n   |英文 | 116W | 96W |\n\n\n\n# 中文文本蕴含模型实验\n本实验采用两个双向LSTM对前提Premise和假设hypothsis进行编码,最周将两个句子表征进行拼接,送入全连接层进行三分类\n1, 网络层如下:\n\n        embedding_layer = Embedding(self.VOCAB_SIZE + 1,\n                                    self.EMBEDDING_DIM,\n                                    weights=[self.embedding_matrix],\n                                    input_length=self.TIME_STAMPS,\n                                    trainable=False,\n                                    mask_zero=True)\n        left_input = Input(shape=(self.TIME_STAMPS,), dtype='float32')\n        right_input = Input(shape=(self.TIME_STAMPS,), dtype='float32')\n        encoded_left = embedding_layer(left_input)\n        encoded_right = embedding_layer(right_input)\n        shared_lstm = self.create_base_network(input_shape=(self.TIME_STAMPS, self.EMBEDDING_DIM))\n        left_output = shared_lstm(encoded_left)\n        right_output = shared_lstm(encoded_right)\n        merged = concatenate([left_output, right_output], axis=-1)\n        merged = Dropout(0.3)(merged)\n        merged = BatchNormalization()(merged)\n        pred = Dense(self.NUM_CLASSES, activation='softmax', name='softmax_prediction')(merged)\n        optimizer = SGD(lr=0.001, momentum=0.9)\n        model = Model(inputs=[left_input, right_input], outputs=pred)\n        model.compile(loss='categorical_crossentropy',\n                      optimizer=optimizer,\n                      metrics=['accuracy'])\n        model.summary()\n\n2, 实验结果\n\n   | 模型 | 训练集 | 测试集| 训练集准确率| 测试集准确率|\n   |:---: | :---: | :---: | :---: | :---: |\n   | Bilstm| 30w | 10W | 0.56|0.54|\n\n# 总结\n1, 本项目针对中文文本蕴含数据集数量不足的问题,提出了一个中文文本蕴含数据集,规模达到88W  \n2, 借助翻译方法进行英文中文转换,前提是英文句子较为短小,短句的翻译效果还是不错的  \n3, 原先打算使用百度API进行翻译,但是使用次数有限制,因此转而以金山毒霸代之,使用在线翻译结果  \n4, 本项目实现了一个以LSTM进行文本蕴含三分类的模型,准确率不是很高,只有0.54左右,后期还有很大的优化空间  \n\n# contact \n如有自然语言处理、知识图谱、事理图谱、社会计算、语言资源建设等问题或合作，请联系我:  \n邮箱:lhy_in_blcu@126.com  \ncsdn:https://blog.csdn.net/lhy2014  \n我的自然语言处理项目: https://liuhuanyong.github.io/  \n刘焕勇，中国科学院软件研究所，lhy_in_blcu@126.com  \n\n\n\n    \n    \n"
  },
  {
    "path": "lstm_train.py",
    "content": "#!/usr/bin/env python3\n# coding: utf-8\n# File: siamese_train.py\n# Author: lhy<lhy_in_blcu@126.com,https://huangyong.github.io>\n# Date: 18-5-23\n\nimport numpy as np\nfrom keras import backend as K\nfrom keras.preprocessing.sequence import pad_sequences\nfrom keras.optimizers import Adam,SGD\nfrom keras.utils import to_categorical, plot_model\nfrom keras.models import Sequential, Model, load_model\nfrom keras.layers import Embedding, Dense, Input, Dropout, Reshape, BatchNormalization, TimeDistributed, Lambda, Layer, LSTM, Bidirectional, Average, concatenate\nimport matplotlib.pyplot as plt\nimport os\nfrom collections import Counter\n\nos.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'\n\nclass SiameseNetwork:\n    def __init__(self):\n        cur = '/'.join(os.path.abspath(__file__).split('/')[:-1])\n        self.class_dict ={\n                         'neutral':0,\n                         'entailment': 1,\n                         'contradiction': 2,\n                         }\n        self.train_path = os.path.join(cur, 'data/train.txt')\n        self.test_path = os.path.join(cur, 'data/test.txt')\n        self.vocab_path = os.path.join(cur, 'model/vocab.txt')\n        self.embedding_file = os.path.join(cur, 'model/token_vec_300.bin')\n        self.model_path = os.path.join(cur, 'tokenvec_bilstm2_model.h5')\n        self.datas, self.word_dict = self.build_data()\n        self.EMBEDDING_DIM = 300\n        self.EPOCHS = 20\n        self.BATCH_SIZE = 512\n        self.LIMIT_RATE = 0.95\n        self.NUM_CLASSES = len(self.class_dict)\n        self.VOCAB_SIZE = len(self.word_dict)\n        self.TIME_STAMPS = self.select_best_length()\n        self.embedding_matrix = self.build_embedding_matrix()\n\n    '''根据样本长度,选择最佳的样本max-length'''\n    def select_best_length(self):\n        len_list = []\n        max_length = 0\n        cover_rate = 0.0\n        sent_list = set()\n        for line in open(self.train_path):\n            line = line.strip().split('\\t')\n            if len(line) < 3:\n                continue\n            sent1 = line[0]\n            sent2 = line[1]\n            sent_list.add(sent1)\n            sent_list.add(sent2)\n\n        for sent in sent_list:\n            sent_len = len(sent)\n            len_list.append(sent_len)\n        all_sent = len(len_list)\n        sum_length = 0\n        len_dict = Counter(len_list).most_common()\n        for i in len_dict:\n            sum_length += i[1] * i[0]\n        average_length = sum_length / all_sent\n        for i in len_dict:\n            rate = i[1] / all_sent\n            cover_rate += rate\n            if cover_rate >= self.LIMIT_RATE:\n                max_length = i[0]\n                break\n        print('average_length:', average_length)\n        print('max_length:', max_length)\n        return max_length\n\n    '''构造数据集'''\n    def build_data(self):\n        sample_y = []\n        sample_x_left = []\n        sample_x_right = []\n        vocabs = {'UNK'}\n        count = 0\n        for line in open(self.train_path):\n            line = line.rstrip().split('\\t')\n            if not line or len(line)<3:\n                continue\n            sent_left = line[0]\n            sent_right = line[1]\n            label = line[2]\n            if label not in self.class_dict:\n                continue\n            sample_x_left.append([char for char in sent_left if char])\n            sample_x_right.append([char for char in sent_right if char])\n            sample_y.append(label)\n            for char in [char for char in sent_left + sent_right if char]:\n                vocabs.add(char)\n            count += 1\n            if count%10000 == 0:\n                print(count)\n        print(len(sample_x_left), len(sample_x_right))\n        sample_x = [sample_x_left, sample_x_right]\n        datas = [sample_x, sample_y]\n        word_dict = {wd:index for index, wd in enumerate(list(vocabs))}\n        self.write_file(list(vocabs), self.vocab_path)\n        return datas, word_dict\n\n    '''将数据转换成keras所需的格式'''\n    def modify_data(self):\n        sample_x = self.datas[0]\n        sample_y = self.datas[1]\n        sample_x_left = sample_x[0]\n        sample_x_right = sample_x[1]\n        left_x_train = [[self.word_dict[char] for char in data] for data in sample_x_left]\n        right_x_train = [[self.word_dict[char] for char in data] for data in sample_x_right]\n        y_train = [self.class_dict.get(i) for i in sample_y]\n        left_x_train = pad_sequences(left_x_train, self.TIME_STAMPS)\n        right_x_train = pad_sequences(right_x_train, self.TIME_STAMPS)\n        y_train = to_categorical(y_train, num_classes=3)\n        return left_x_train, right_x_train, y_train\n\n    '''保存字典文件'''\n    def write_file(self, wordlist, filepath):\n        with open(filepath, 'w+') as f:\n            f.write('\\n'.join(wordlist))\n\n    '''加载预训练词向量'''\n    def load_pretrained_embedding(self):\n        embeddings_dict = {}\n        with open(self.embedding_file, 'r') as f:\n            for line in f:\n                values = line.strip().split(' ')\n                if len(values) < 300:\n                    continue\n                word = values[0]\n                coefs = np.asarray(values[1:], dtype='float32')\n                embeddings_dict[word] = coefs\n        print('Found %s word vectors.' % len(embeddings_dict))\n        return embeddings_dict\n\n    '''加载词向量矩阵'''\n    def build_embedding_matrix(self):\n        embedding_dict = self.load_pretrained_embedding()\n        embedding_matrix = np.zeros((self.VOCAB_SIZE + 1, self.EMBEDDING_DIM))\n        for word, i in self.word_dict.items():\n            embedding_vector = embedding_dict.get(word)\n            if embedding_vector is not None:\n                embedding_matrix[i] = embedding_vector\n        return embedding_matrix\n\n    '''搭建编码层网络,用于权重共享'''\n    def create_base_network(self, input_shape):\n        input = Input(shape=input_shape)\n        lstm1 = Bidirectional(LSTM(128, return_sequences=True))(input)\n        lstm1 = Dropout(0.5)(lstm1)\n        lstm2 = Bidirectional(LSTM(64))(lstm1)\n        lstm2 = Dropout(0.5)(lstm2)\n        return Model(input, lstm2)\n\n    '''搭建网络'''\n    def bilstm_siamese_model(self):\n        embedding_layer = Embedding(self.VOCAB_SIZE + 1,\n                                    self.EMBEDDING_DIM,\n                                    weights=[self.embedding_matrix],\n                                    input_length=self.TIME_STAMPS,\n                                    trainable=False,\n                                    mask_zero=True)\n        left_input = Input(shape=(self.TIME_STAMPS,), dtype='float32')\n        right_input = Input(shape=(self.TIME_STAMPS,), dtype='float32')\n        encoded_left = embedding_layer(left_input)\n        encoded_right = embedding_layer(right_input)\n        shared_lstm = self.create_base_network(input_shape=(self.TIME_STAMPS, self.EMBEDDING_DIM))\n        left_output = shared_lstm(encoded_left)\n        right_output = shared_lstm(encoded_right)\n        merged = concatenate([left_output, right_output], axis=-1)\n        merged = Dropout(0.3)(merged)\n        merged = BatchNormalization()(merged)\n        pred = Dense(self.NUM_CLASSES, activation='softmax', name='softmax_prediction')(merged)\n        optimizer = SGD(lr=0.001, momentum=0.9)\n        model = Model(inputs=[left_input, right_input], outputs=pred)\n        model.compile(loss='categorical_crossentropy',\n                      optimizer=optimizer,\n                      metrics=['accuracy'])\n        model.summary()\n        return model\n\n    '''训练模型'''\n    def train_model(self):\n        left_x_train, right_x_train, y_train = self.modify_data()\n        model = self.bilstm_siamese_model()\n        history = model.fit(\n                              x=[left_x_train, right_x_train],\n                              y=y_train,\n                              validation_split=0.25,\n                              batch_size=self.BATCH_SIZE,\n                              epochs=self.EPOCHS,\n                            )\n        self.draw_train(history)\n        model.save(self.model_path)\n        return model\n\n    '''绘制训练曲线'''\n    def draw_train(self, history):\n        plt.plot(history.history['acc'])\n        plt.plot(history.history['val_acc'])\n        plt.title('Model accuracy')\n        plt.ylabel('Accuracy')\n        plt.xlabel('Epoch')\n        plt.legend(['Train', 'Test'], loc='upper left')\n        plt.show()\n        # Plot training & validation loss values\n        plt.plot(history.history['loss'])\n        plt.plot(history.history['val_loss'])\n        plt.title('Model loss')\n        plt.ylabel('Loss')\n        plt.xlabel('Epoch')\n        plt.legend(['Train', 'Test'], loc='upper left')\n        plt.show()\n\nhandler = SiameseNetwork()\nhandler.train_model()\n\n"
  },
  {
    "path": "model/vocab.txt",
    "content": "编\n念\n恺\n娘\n颈\n悬\n璜\n凄\n垂\n苛\n（\n惜\n橙\n焚\n潘\n蜈\n息\n井\n儿\n顷\n碎\n砸\n奄\n鞋\n辋\n单\n妈\n谱\n蒙\n姬\n斡\n凸\n渡\n撑\n肮\n咖\n缴\n：\n翠\n违\n兆\n晴\n乃\n呕\n懊\n雪\n沐\n趾\nO\n挚\n狈\n葛\n春\n塞\n梁\n坐\n码\n超\n吧\n棘\n萃\n窝\n日\n配\n变\n锻\n克\n蘖\n赠\nD\n搂\n骆\n笺\n司\n遮\n吁\n侦\n践\n啜\n泣\n澳\n堕\n押\n引\n扑\n蜕\n吾\n倍\n佳\n弹\n激\n辞\n般\n易\n洪\n甚\n芳\n湛\n帽\n猖\n腑\n淋\n吟\n报\n蹲\n惨\n骗\n钴\nUNK\n钠\n菲\n逐\n悼\n悖\n/\n料\n涌\n瓷\n耦\n麒\n昭\n刷\n赫\n饭\n侨\n惺\n群\n含\n夯\n庞\n募\n放\n评\n虱\n迥\n纲\n富\n迷\n疆\n撞\n宙\n芭\n蕾\n灼\n证\n越\n陌\n商\n戈\nα\n抓\n都\nU\n悔\n痴\n俊\n榈\n貂\n傲\n僚\n斗\n缕\n鲨\n险\n殖\n褶\n吵\n骸\n遗\n虔\n*\n颅\n养\n砾\n捍\n草\n啤\n獒\n匀\n曝\n序\n显\n皑\n爷\n策\n塘\n幅\n彻\n橇\n品\n还\n感\n捧\n峻\n婊\n们\n办\n悟\n用\n韧\n看\n嫉\n澡\n胃\n挤\n饪\n茂\n萧\n溅\n瑜\n蹴\n咒\n版\n玻\n摘\n轮\n酱\n周\n歪\n裙\n按\n令\nk\n返\n谁\n墓\n豪\n珠\n股\n鳌\n精\n临\n网\n\"\n所\n培\n岩\n礁\n脂\n忽\n沿\n呢\n萨\n曼\n舌\n馈\n蚓\n寮\n飙\n瑰\n鹫\n消\n愤\n屯\n钯\n瞅\n访\n睹\n泞\n堡\n怒\n潇\n斋\n娶\n棺\n雄\n胸\n骼\n魇\n效\n送\n是\n悦\n县\n生\n睫\n沛\n友\n踵\n侈\n嗡\n二\n照\n浚\n爬\n芜\n川\n怂\n侏\n集\n麟\n纷\n吼\n漾\n蚕\n对\n髋\n髦\n脾\n估\n毛\nQ\n路\n鱼\n�\n掌\n低\n它\n客\n葫\n树\n浪\n差\n猿\n跻\n？\n辈\n膺\n垫\n麽\n狙\n避\n憎\n戒\n菖\n却\n搞\n泳\n空\n翡\n箍\n廊\n愕\n蔼\n谑\n臣\n腐\n冻\n明\n艳\n帚\nv\n闩\n躬\n骄\n件\n辙\n逝\n傀\n出\n巢\n逃\n篇\n厕\n昨\n烁\n傅\n份\n惹\n肢\n桅\n荔\n酥\n枷\n接\n漱\n听\n丈\n铅\n哔\n跌\n玺\n奶\n舒\n痕\n完\n颧\n竣\n刽\n倡\n秃\n芯\n呈\n瞧\n舍\n祟\n苦\n媒\n阈\n妒\n诽\n嫁\n俏\n灯\n验\n舞\n形\n恶\n哲\n汹\n迎\n酸\n纯\n茫\n只\n大\n撕\n共\n跤\n捡\n堰\n莴\n气\n宇\n专\n腿\n崔\n痿\n畜\n屠\n绽\n跋\n跨\n军\n秒\n嘶\n咨\n购\n锋\n×\n馏\n厢\n狼\n逆\n漂\n甜\n脐\n卷\n殿\n渲\n轻\n华\n戴\n菇\n各\n扉\n扔\n艰\nœ\n直\n残\n患\n奠\n磐\n痹\n蔚\n牛\n弊\n杆\n齐\n烈\n黝\n撰\n濒\n委\n诫\n蛮\n治\n趋\n到\n努\n搡\n犀\n得\n灵\n肺\n瞳\n横\n弃\n多\n桦\n滤\n泌\n畸\n熨\n闹\n睦\n票\n牵\n昌\n堑\n攒\n砧\n阀\n蜢\nà\n麝\n廉\n石\n陀\n饮\n卖\n发\n肖\n袒\n聆\n察\n润\n刹\n见\n阂\n拓\n侵\n阐\n循\n嘉\n帘\n焙\nP\n刺\n薰\n省\n贩\n迁\n夫\n胆\n珊\n习\n鞍\n罕\n螳\n帮\n途\n拒\n木\n姜\n竿\n嗤\n唁\n脚\n冗\n咯\n孤\n邻\n扰\n忧\n怜\n此\n憩\n潴\n醚\n康\n胎\n鳍\n吉\n月\n痪\n孟\n订\n矸\n渔\n莽\n彦\n游\n砂\n沉\n邮\n琉\n长\n落\n裂\n逛\n拟\n妖\n员\n神\n略\n潜\n蚱\n佃\n?\n软\n满\n杠\n裱\n很\n极\n京\n积\n者\n假\n矶\n伯\n柴\n兢\n跚\n橘\n靶\n来\n充\n伟\n彼\n魅\n尬\n洲\n庐\n土\n权\n轩\n愁\n驴\n普\n疣\n税\n绘\n荼\n绕\n薯\n音\n虹\n臃\n埚\n雅\n议\n鲶\n犬\n私\n踮\n美\n车\n烙\n场\n读\n干\n鸟\n浃\n厦\n东\n驼\n纫\n岭\n掀\n抖\n肆\n蕊\n王\n纸\n陷\n清\n鲤\n蜂\n育\n殷\n吻\n队\n镯\n喇\n卒\n换\n钹\n忘\n蓖\n瓮\n桐\n舔\n灰\n辱\n晤\n胫\n立\n牒\n挖\n椭\n凡\n煮\n忡\n铭\n鼬\n棠\n琼\n哮\n莱\n宏\n沥\n顿\n岳\n稠\n果\n！\n[\n卧\n鸭\n卑\n冉\n其\n嚎\n冬\n拷\n悠\n诵\n豆\n饲\n要\n采\n屋\n抄\n叽\n冯\n岸\n喋\n销\n种\n著\n延\n蛛\n酵\n罐\n智\n定\n瞩\n幢\n寂\n红\n会\n凳\n祭\n皙\n蟾\n构\n弦\n岌\n倦\n质\n晓\n缠\n本\n哨\n汽\n袜\n绿\n涨\n皇\n扎\n恼\n八\n佑\n腻\n柚\n进\n操\n奔\n猕\n栗\n谷\n柑\n阑\n卵\n砥\n禁\n蟑\n支\n鉴\n涵\n侬\n箱\n怯\n爸\n滚\n壮\n瘾\n矿\n关\n步\n审\n景\n疹\n嘘\n镶\n奕\n羚\nπ\n缉\n稽\n豁\n汗\n闺\n镖\n扼\n匕\n运\n辰\n棉\n曳\n拿\n今\n什\n捐\n协\n、\n征\n才\n徒\n骑\n冷\n谚\n盟\n绞\n规\n欺\n耕\n敞\n瘦\n萝\n溢\n欠\n拨\n驳\n慷\n塔\n柯\n腹\n氢\n页\n淑\n掺\n呃\n恢\n崖\n僧\n浓\n咽\n江\n泵\n妨\n补\n女\ng\n灿\n弟\n层\n己\n蔓\n泼\n叉\n惭\n町\n盘\n祉\n熏\n惧\n雨\n酋\n孵\nK\n鳕\n界\n苟\n矮\n熊\n待\n屹\n撇\n希\n鳞\n涤\n蜓\n侪\n凯\n耸\n觅\n盖\n术\n笃\n浊\n粼\n嘴\n绎\n亥\n墙\n间\n迅\n朝\n挨\n敬\n咄\n唾\n翻\n枕\n街\n怡\n夺\n命\nW\n雾\n涯\n蛆\n宫\n辜\n膝\n禄\n地\n核\n奉\n凰\n瑕\n疼\n啃\n锦\n赘\n胀\n谍\n媳\n雌\n蛎\n烤\n逞\n随\n吐\n争\n士\n眶\n嘿\n菅\n鸿\n6\n缄\n镭\n蝗\n题\n再\n漪\n勒\n豚\n龙\n巫\n彗\n舆\n隶\n翼\n娱\n眩\n攸\n腌\n应\n薇\n正\n如\n圣\n诟\n谈\n降\n岑\n割\n诈\n竺\nw\n弱\n伴\n盯\n阁\n笋\n枣\n敖\n好\n症\n弄\n遭\n酰\n厩\n姻\n轧\n氨\n思\n拂\n雇\n达\n寥\n炎\n$\n淇\n你\n腾\n式\n鹏\n茵\n宰\n既\n圈\n邀\n鹑\n串\n腋\n最\n法\n韵\n参\n丑\n第\n钙\n脸\n焰\n8\n腔\n巨\n睛\n愚\n伐\n剩\n档\n纱\n苗\n隆\n呼\n窑\n图\n螃\n赋\n入\n钉\n魔\n饶\n秽\n烯\n幼\n吓\n台\n门\n缎\n媚\n谐\n蔽\n虚\n谜\n键\n娅\n埔\n握\n数\n校\n糊\n犸\n绰\n伶\n蝎\n钟\n柜\n帷\n歌\n申\n和\n福\n闭\n改\n陪\n胺\n藻\n恤\n罂\n龄\n暇\n履\n1\n逾\n酌\n鳏\n旧\n焊\n匙\n属\n六\n铣\n尿\n骤\n捉\n座\nm\n夸\n火\n嘛\n赌\n睡\n找\n利\n啊\n澈\n氟\n蝉\n穴\n勋\n跑\n蒜\n尸\n秘\n武\n携\n仙\n跪\n蟹\n钢\n声\n穿\n败\n辣\n触\n惮\n苑\n失\n耿\n幌\n葱\n相\n2\n玫\n虽\n滞\n熵\n掏\n咐\n举\n退\n鸦\n栽\n求\n偶\n健\n油\nJ\n总\n寝\n嗨\n丫\n琐\n踪\n骏\n睿\n笛\n魁\n妓\n氪\n挡\n惰\nL\n帝\n戚\n睬\n役\n嫌\n溃\n供\n摸\n技\n救\n瓢\n叹\n百\n个\n耳\n窄\n脏\n沙\n震\n侍\n魏\n棚\n伎\n后\n党\n碍\n扛\n答\n欲\n欧\n瘙\n朗\n灭\n汤\n鹤\n算\n耽\n烩\n这\n哼\n寒\n注\n头\n摊\n靠\n蹒\n衷\n顾\n压\n聋\n祠\n澜\n爽\n蓿\n鸣\n严\n碌\n咏\n慰\n蹦\n贪\n瞒\n把\n4\n脯\n想\n舀\n崛\n驹\n跳\n渍\n茬\n暴\n匪\n蜻\n浸\n窃\n拾\n寄\n勺\n苯\n羯\n怕\n兮\n椎\n盏\n佛\n猴\n何\n窟\n擎\n俱\n吗\n饼\n尴\n赢\n祝\n郡\n抚\n机\n守\n符\n镁\n驭\n必\n默\n奈\n盎\n纹\n监\n赂\n亨\n狗\n拆\n族\n巡\n详\n过\n颖\n检\n止\n湿\n菌\n谕\n许\n蚣\n振\n氮\n镑\n茁\n繁\n辅\n壑\n倒\n祈\n臂\n买\n°\n青\n蜷\n鲈\n海\n互\nh\n粉\n楷\n拙\n乔\n乙\n丘\n酊\n鹌\n醛\n姊\n帆\n厉\n附\n斧\n做\n零\n镐\n主\n悄\n说\n老\n惯\n耗\n讹\n絮\n荒\n乘\n拣\n叛\n星\n《\n疱\n抬\n仪\n柏\n锌\n孙\n汪\n歉\n囱\n鹰\n萄\n蜃\n冥\n掩\n始\n丸\n妹\n缤\n拭\n缮\n钥\n划\n凉\n坞\n虫\n胶\n拖\n溶\n么\n锐\n宠\n恐\n诊\n简\n吠\n铬\n圭\n奢\n铜\n均\n札\n戳\n固\n鳗\n模\n烦\n渥\n砺\n厄\n紊\n针\n麓\n弓\n羁\n镗\n跷\n喉\n痢\n轨\n鲍\n桥\n莎\n管\n耶\nz\n仁\n脑\n蹄\n缚\n镕\n行\n臭\n殉\n兄\n中\n连\n>\n阉\n雷\n尘\n蜜\n鹅\n戛\n身\n套\n贵\n杖\n控\n便\n楚\nr\n匠\n赴\n论\n娥\n吝\n尊\n产\n萸\n憔\n脆\n科\n疾\n酷\n炬\n邑\n然\n咆\n咫\n榨\n掉\n援\n秋\n漠\n绩\n铲\n桑\n狮\n俾\n框\n匹\n妻\n活\nE\n剃\n嗒\n刀\n吃\n合\n另\n驾\n雏\n鼻\n肿\n影\n河\n，\n霹\n…\n瓦\n苏\n赞\n梭\n可\n佐\n趁\n愉\n帅\n兰\n户\n像\n谅\n苹\n究\n索\n曲\n功\n言\n蔡\n尖\n驶\n肤\n莲\n戟\n伍\n烷\n粘\n桔\n院\n性\n徊\n纬\n鼠\n手\n炫\n源\n叮\n霾\n悯\n描\n训\n扶\n霉\n光\n枉\n危\n狭\n纠\nI\n缩\n遏\n取\n蒸\n鳟\n锥\n酗\n妾\n缸\n蹩\n欢\n杭\n害\n赡\n衬\n册\n匿\n斩\n研\n冤\n颌\n猪\n坦\n刮\n泽\n胛\n奖\n护\n贷\n劝\n猩\n悍\n炉\n氏\n怀\n室\n歹\n储\n赦\n崩\n增\n噩\n几\n安\n凝\n奇\n淤\n尝\n七\n晾\n城\n煤\n梅\n硅\n现\n叭\n郁\n穹\n牡\n醋\n瘤\n伪\n擂\n郎\n偿\n椰\n蜍\n浏\n伙\n透\n泰\n鹿\n池\n昏\n肃\n怖\n政\n或\n秤\n薄\n绥\n烬\n肉\n锚\n葩\n锹\n~\n睐\n酿\n野\n诗\n函\n苇\n程\n藓\n隧\n棱\n坂\n菜\n扫\n花\n孝\n缓\n碾\n愿\n熟\n整\n拍\n慕\n刘\n母\n纳\n瑙\n圳\n缀\n敛\n拱\n俑\n妮\n谘\n露\n硬\n龛\n酝\n昙\n晕\nX\n槃\n髻\n蛤\n并\n筹\n与\n别\n映\n扁\n沮\n疵\n颚\n诡\n由\n拉\n莓\n岱\n箴\n减\n替\n豌\n稻\n亮\n懒\n献\n棒\n牺\n跃\n獾\n栩\n凹\n迹\n戮\n醒\n市\n医\n剖\n坏\n蜗\n半\n捷\n结\n解\n靡\n团\n迄\n5\n:\n芒\n表\n印\n煦\n鹕\n诣\n输\n屁\n阵\n停\nó\n托\n俘\n彬\n巅\n券\n练\n昂\n祗\n漫\n窗\n猝\n撤\n脊\n鸡\n澎\n民\n瓶\n矛\n九\n歼\n峡\n镜\n登\n朴\n债\n讯\n傻\n扯\n阻\n缈\n钻\n灶\n襟\n尤\n谧\n诙\n3\n鄙\n柠\n望\n圆\n须\n洗\n膏\n陡\nY\n滋\n萼\n亭\n肓\n药\n羔\n厅\n馀\n永\n溪\n竞\n妄\n怪\n卤\n鞑\n屈\n亩\n芹\n磷\n荚\n藤\n挥\n黑\n宾\n赏\n掘\n调\n玉\n售\n顽\n奸\n邱\n挂\n搓\n槲\n蛙\n呐\n啉\n眨\n陛\n惠\n鲱\n仑\n缺\n俩\n吕\n毒\n丰\n屉\n轴\n馁\n聚\n痛\n涕\n猎\n喱\n狂\n密\n榕\n沌\n膨\n&\n额\n础\n峙\n肥\n蟀\n蛇\n蛭\n亿\n桉\n嚏\n类\n笆\n成\n痨\n诿\n涸\n汀\n毅\n业\n釉\n绸\n淘\n贫\n官\n李\n啡\n夕\n讨\n炙\nc\n蚂\n谋\n剑\n雳\n距\n眺\n信\n反\n少\n阶\n奎\n喧\n镳\n乍\n汲\n巧\n昼\n阔\n胁\n愈\n蕉\n黛\n萦\n扩\n尾\n螯\n镍\n拴\n卸\n剪\n偷\n梨\n嚼\n槽\n准\n烂\n等\n瓣\n银\n裔\n双\n罩\n因\n氰\n衍\n滩\n喔\n彭\n悴\n菊\n状\n隔\n边\n传\n为\n至\n缘\n噱\n逍\n塑\n将\n妆\n烘\n劾\n颐\n酶\n缆\n迪\n鬣\n邓\n义\n擅\n嵌\n型\n颇\n否\n剥\n坩\n着\n藏\n翁\n葬\n蚤\n凌\n纵\n韩\n荟\n话\n陆\n幕\n膜\n辘\n蝠\n颊\n展\n点\n腕\n啁\n=\n局\n咧\n牙\n姨\n粪\n久\n量\n崭\n滔\n袱\n贾\n杉\n递\n勃\n那\n钝\n槌\n端\n皱\n鹭\n吹\nV\n黯\n玄\n浑\n晦\n淌\n艮\n筷\n较\n玩\n向\n渠\n丝\n茸\n覆\n泛\n博\n泪\n僻\n婉\n适\n崎\n磋\n啾\n糖\n链\n称\n坠\n搜\n淡\n样\n投\n味\n祷\n疑\n蔑\n咎\n府\n迦\n慌\n藐\n罔\n坚\n帜\n侄\n披\n颠\n铆\n妊\n洞\n宛\n垩\n幻\n巴\n装\n劫\n流\n掐\n攫\n据\n拐\n析\n菱\n色\n饵\n詹\n疤\n惩\n骨\n曹\n诃\n泻\n寓\n错\n踩\n暹\n姓\n滕\n皿\n奏\n屡\n卿\n革\n锢\n温\n络\n偎\n兑\n哝\n了\n敌\n咕\n颂\n咙\n秆\n营\ne\n艺\n抒\n指\n炖\nA\n语\n夜\n咬\n%\n陋\n蜒\n米\n秀\na\n狠\n财\n词\n插\n艘\n锄\n够\n宅\n醇\n去\n聘\n枢\n遢\n物\n绒\n猜\n叫\n同\n稀\n戎\n建\n硝\n器\n浇\n宿\n卉\n呀\n圃\nu\n烹\n填\n铎\n瀚\n诠\n垮\n诱\n梦\n牌\n倾\n课\n异\n淆\n洛\n制\n豹\n派\n授\n诬\n乎\n杜\n聿\n居\n逗\n腥\n泡\n慧\n开\n蚊\n蜡\n忆\n兹\n块\n坟\n恰\n趴\n墨\n下\n卡\n染\n碟\n嗑\n乞\n脓\n通\n之\n缝\n孕\n秸\n鹊\n提\n倚\n狒\n喵\n陶\n宝\n瀑\n肌\n乡\n疫\n纂\n悉\n启\n羹\n髓\n脖\n刊\n喻\n蹼\n!\n扒\n簿\n小\n经\n恨\n潺\n先\n炒\n蛀\n捏\n季\n€\n痰\n颓\n厘\n起\n铝\n鼩\n庭\n黄\n骇\n暂\n祸\n湍\n卦\n噬\n枚\n盆\n诩\n轭\n慑\n袤\n嗯\n逊\n跆\n伏\n荐\n贬\n瑚\n躯\n询\n芥\n唆\n褐\n刑\n咱\n蔗\n氛\n瓜\n渴\n酯\n剽\nb\n屑\n抛\n喙\n枫\n遥\n事\n寸\n甲\n禅\n钞\n孚\n匮\n麦\n跟\n嚣\n栈\n工\n善\n；\n焉\n细\n邃\n冶\n捻\n鞠\n摔\n领\n毡\n粟\n邋\n锑\n航\n济\n狐\n簇\n餐\n搅\n洋\n迈\nx\n噻\n作\n惫\n萌\nG\n榴\n家\n胰\n匣\n赔\nT\n蜘\n聊\n挽\n薪\n邸\n丐\n也\n溜\n膛\n淬\n憬\n液\n朱\n瞥\n徐\n顶\n肩\n吩\n瑞\n筋\n西\n钛\n拜\n站\n虾\n揍\n峭\n瞬\n9\n板\n咪\n冠\n签\n存\n勿\n捆\n葡\n煎\n旦\n瘟\n锣\n斤\n三\n褒\n抉\n讳\n镀\n轿\n朽\n交\n畔\n癫\n内\n栓\n蒲\n礼\n膳\n香\n古\n呎\n堆\n遍\n腭\n攘\n琢\n嘱\n爵\n抱\n径\n坛\n速\n渐\n迟\n吱\n铉\n贺\n享\n肘\n氦\n槟\n翩\n焕\n肇\n闪\n遛\n疯\n坷\n素\n铛\n乱\n津\n痊\n恍\n榻\n朋\n响\n范\n价\n顺\n涓\n燃\n竖\n况\n释\n爹\n獭\n深\n榷\n畴\n衣\n社\n龟\n雀\n灌\n骰\n颤\n瘪\nj\n燥\n牢\n娄\n球\n走\n霜\n晃\n喳\n亵\n劳\nC\n喜\n恭\n众\n州\n续\n钩\n牧\n衰\n际\n请\n候\n字\n林\n借\n斑\n萎\n锡\n碳\n躏\n常\n址\n粮\n掷\n仅\n嘟\n追\n侠\n案\n栅\n汇\n竭\n凭\n确\n寐\n爪\n又\n浆\n潭\n涩\n》\n鲷\n幽\n糕\n驯\n匾\n颜\n翔\n击\n斜\n硕\n肋\n考\n塌\n荡\n屏\n嘎\n袭\n枪\n汉\n德\nf\n碑\n疸\n於\n拽\n伽\n刻\n伤\n潢\n从\n姿\n斓\n煌\n联\n隋\n冰\n娩\n叟\n汁\n净\n嗜\n择\n逢\n文\n山\n槛\n祖\n哀\n炊\n芙\nl\n狸\n杏\n胡\n涂\n卟\n瞌\n劣\n督\n务\n货\n鲁\n丹\n蚁\n杂\n渗\n厂\n晚\n扭\n鼓\né\n噢\n琶\n鲭\n竟\n认\n漏\n胳\n敢\n啪\n栏\n冀\n宗\n朔\n怨\n录\n讫\n趟\n殆\n预\n盥\n裕\n舰\n驰\n亡\n志\n鹉\n号\n岖\n拄\no\n鱲\n飞\n湃\n厨\n郊\n啄\n弈\nR\n腊\n村\n钾\n杨\n羊\n带\n人\n沼\n污\n尼\n鲜\n檀\n甸\n冈\n皮\ny\n涉\n磁\n舟\n挝\n封\n特\n蜊\n溺\n眉\n瘠\n甄\n麸\n簧\n愧\n倪\n瘩\n惕\n万\n哗\n铰\n搁\n飘\n窖\n尹\n仇\n饰\n不\n绳\n篝\n助\n乌\n琥\n扬\n殴\n墅\n睽\n忙\n以\n馆\n耙\n挺\n迭\n予\n惚\n幔\n服\n徽\n织\n嘻\n椹\n笔\n穆\n橄\n疮\n胧\n痤\n酪\n纤\n绪\n扮\n浮\n裁\n皆\n儒\n碇\n茎\n饺\n驱\n于\n奂\n田\n铐\n雕\n疲\n趣\n账\n帑\n孩\n战\n船\n檐\n沃\n垛\n写\n例\n拇\n宋\n畏\n喝\n根\n势\n叠\n恕\n贱\n非\n兵\n苍\n设\n恋\n缔\n臀\n讽\n喃\n态\n吞\n巩\n腱\n峦\n外\n鱿\n沫\n荃\n盒\n舶\n蛾\n钓\n浣\n吸\n殊\n容\n近\n意\n踱\n袖\n情\n嶙\n讼\n哎\n统\n觎\n嘈\n缛\n松\n里\n惊\n毫\n麋\n窍\n熬\n讲\n楔\n夷\n自\n十\n公\n攀\n焦\n虑\n滥\n踝\nt\n嗝\n淀\n洽\n慈\n陈\n坍\n杰\n茱\n誉\n寡\n昵\n埃\n咸\n岛\n黎\n玷\n束\n荷\n樟\n胜\n强\n乓\n侃\n房\n囤\n屎\n宪\n礴\n左\nN\n漆\n扣\n蝇\n蠕\n柬\n涟\n离\n轰\n体\n裹\n梯\n俗\n俚\n诋\n踢\n叙\n诀\n罗\n呵\n篱\n翘\n蘑\n禽\n咔\n谎\n的\n摇\n云\n亦\n南\n凑\n匆\n揭\n矗\n袄\n农\n施\n啼\n伸\n两\n给\n寿\n赛\n榔\n牦\n闲\n莫\n在\n熔\n闸\n颁\n麻\n殡\n堂\n教\n笑\n）\n蓬\n鳄\n楼\n脉\n忌\nH\n藉\n棍\n犯\n约\n湾\n企\n弗\n栋\nZ\n萤\n升\n底\n慢\n谴\n血\n爱\n郭\n坝\n竹\n犊\n记\n董\n亏\n店\n枝\n融\n泊\n级\n鬃\n需\n蕨\n锤\n犷\n醉\n笨\n0\n眼\n窒\n煽\n恙\n肝\n召\n粗\n泥\n谭\n侣\n犹\n淫\n孜\n男\n酒\n霍\n被\n抹\n堤\n俸\n砷\n邦\n(\n创\n笞\n茴\n棋\n担\n搬\n肠\n漓\n聪\n谟\n胞\n笼\n措\n钚\n寺\n舱\n太\n丧\n境\n窜\n禧\n裘\n乒\n马\n褴\n年\n冒\n睁\n侯\n柱\n床\n君\n背\n蜴\n子\n畅\n懦\n籍\n澄\n唯\n萍\n狱\n凿\n]\n千\n除\n蠢\n鄂\ni\n饷\n载\n瘫\n仔\n翱\n敷\n岗\n陨\n潦\n跛\n胱\n位\n纽\n裸\np\n耘\n嗓\n园\n复\n荆\n椅\n撮\n筝\n艇\n拮\n妞\n渺\n扇\n贸\n昧\n废\n捕\n贴\n缭\n妃\n哑\n铂\n簸\n处\n_\n天\n导\n诸\n嬉\n婪\n赐\n谤\n批\n墩\n猫\n岔\n榄\n逻\n溴\n闻\n陵\n粱\n钦\n祥\n责\n毗\n届\n噜\n使\n羡\n赖\n逼\n坎\n良\n锯\n误\n碉\n菠\n抵\n列\n绷\n桌\n滨\n线\n徙\n裆\n款\n睑\n上\n纪\n沾\n俭\n哪\n一\n蜱\n硼\n芬\n吴\n捞\n热\n鹈\n敦\n觉\n哩\n挛\n挪\n骷\n部\n屿\n肾\n电\n砰\n迂\n涛\n蚀\n棕\n魂\n惑\n戏\n观\n判\n选\n捅\n弧\n段\n蹂\n碰\n您\n维\n港\n仗\n伦\n妥\n渊\n新\n租\n躁\n诲\n介\n则\n拳\n劈\n磡\n蓄\n踏\n痒\n回\n测\n弥\n啸\n让\n搭\n憧\n圾\n契\n谬\n躲\n)\n绌\n而\n紧\n螂\n短\n诚\n囚\n鞘\n荣\n哄\n搪\n铸\n炮\n婴\n瞻\n逸\n卫\n拥\n资\n住\n视\n茶\n砍\n绊\n演\n辨\n休\n班\n辛\n蒋\n问\n格\n泄\n旱\n酚\n频\n螺\n撼\n勇\n鳃\n示\n波\n备\n贡\n唠\n砖\n励\n揽\n纾\n洁\n典\n丁\n拯\n北\n咀\n亟\n彩\n琳\n阪\n率\n壳\n障\n口\n代\n诺\n褪\n域\n癣\n茨\n稳\n靛\n兔\n嗽\n折\n橱\n仆\n负\n痘\n谊\n前\n赚\n奥\n罄\n琵\n桃\n唉\n壶\n全\n侮\n燕\n括\n述\n英\n婆\n抑\n辩\n挠\n渎\n绚\n溯\n辽\n喘\n绍\n旗\n蟋\n浩\n赁\n番\n铠\n识\n哇\n龈\n捣\n帖\n截\n没\n兴\n佼\n终\n痔\n散\n撒\n诅\n蹬\n仓\n嗅\n昆\n疙\n计\n-\n标\n试\n疟\n瘸\n瞪\n原\n肽\n拔\n暑\n罚\n矩\n告\n靼\n鬼\n唬\n继\n弋\n杯\n僵\n炼\n喷\n韦\n粥\n啬\n阳\n次\n喹\n胖\n力\n亚\n溉\n噪\ns\n困\n材\n锈\n布\n硫\n贝\n勤\n囊\n暧\n兽\n歧\n芝\n浦\n讣\n度\n凶\n刚\n但\n疗\n留\n蓉\n辆\n茜\n句\n。\n旬\n珍\n虎\n疽\n系\n故\n且\n蜥\n环\n膊\n娃\n蜚\n鞭\n贻\n爆\n绉\n谓\n桶\n逮\n猥\n宵\n酩\n晶\n蔻\n堵\n泉\n四\n鹃\n籽\n御\n睾\n寻\n彰\n甘\n癌\n喂\n赎\n藕\n宣\n碱\n熄\n占\n师\n椒\n夏\n苔\n腺\n怎\n挑\n森\n转\n五\n暖\n绅\n玛\n毕\n暗\n矫\n粒\n骡\n组\n痉\n巾\n朵\n姑\n架\n桩\n混\n恩\n糯\n修\n动\n推\n剧\n伊\n埋\n霓\n讪\n汰\n切\n累\n肚\n敏\n柿\n歇\n尚\n镇\n锁\n仍\n曙\n铧\n妇\n乐\n霸\n拢\n足\n烧\n遂\n瘀\n琪\n漩\n缥\n浅\n学\n舷\n仿\n榜\n早\n已\n父\n损\n致\n置\n骚\n辖\n译\n莹\n辑\n掠\n优\n膀\n洒\n览\n桁\n媲\n,\n加\nS\n具\n午\n绵\n芦\n赤\n狡\n绝\n哈\n旨\n宽\n饯\n氧\n余\n哺\n吊\n挫\n首\n丢\n黏\n平\n橡\n惋\n痫\n茅\n斐\n史\n荫\n舵\n翅\n打\n抽\n决\n秩\n奴\n搏\nd\n缅\n拘\n灾\n姐\n淹\n贤\n静\n蝙\n壕\n佣\n茄\n樱\n寞\n锂\n综\n象\n尺\n蔬\n宁\n当\n撬\n榉\n \n瞰\n苣\n哦\n昔\n侥\n伞\n仲\n旺\n蹭\n庄\n贞\n霆\n诞\n暮\n厥\n浴\n遵\n褛\n谨\n吮\n崽\n真\n扳\n庙\n袂\n悲\n筏\n摩\n梧\n劲\n戊\n酮\n筛\n鼹\n宜\n突\n未\n刁\n衫\n铃\n探\n高\n画\n弯\n柳\n坊\n益\n魄\n童\n叶\n沟\n丙\n狄\n羞\n汶\n‘\n遇\n隙\n免\n寨\n厚\n呛\n她\n袋\n楞\n摧\n翰\n凛\n偏\n尧\n铁\n妙\n乏\n查\n骂\n食\n防\n蕴\n晋\n檬\n梵\n躺\n央\n崇\n庇\n执\n汐\n嫩\n纶\n褥\n芽\n邪\n饿\n钱\n贯\n白\n栖\n姆\n刃\n重\n叨\n耍\n饥\n知\n阿\n盲\n弩\n条\n历\n贼\n快\n书\n’\n勾\n杀\n阅\n难\n馅\n阱\n徘\n飓\n谦\n耆\n瞎\n有\n讶\n破\n些\n氯\n承\n篷\n章\n嘲\n莺\n紫\n茉\n比\n帧\n蹈\n羽\n他\n垒\n某\n促\n虐\n币\n蛋\n铀\n棵\n坡\n佬\n蟒\n盾\n羟\n庸\n巷\n金\n鼎\n赶\n乳\n娜\n娇\n催\n闷\n粹\n靖\n镰\n啦\n盛\n帕\n国\n闯\n髅\n菩\n往\n毯\n瑟\n烟\n孢\n唤\n心\n烛\n裤\n独\n宴\n及\n捂\n桂\n挣\n元\n+\n削\n儡\n癖\n脱\n每\n依\n哭\n苜\n叔\n剔\n隘\n世\n勉\n卢\n分\n似\n尽\n驻\n碧\n胭\n鸥\n漉\n急\n更\n化\n@\n蚯\n项\n糙\n刨\n齿\n肟\n鞣\n亲\n包\n纺\n稿\n剂\n初\n旁\n唧\n篓\n婿\n遣\n豫\n疚\n蝴\n箔\n目\n阴\n沦\n阡\n胬\n时\n归\n擦\nγ\n欣\n钳\n拌\n滑\n招\n稚\n觑\n傍\n滴\ní\n受\n尔\n螨\n袍\n炽\n滓\n衅\n孽\n抢\n壁\n株\n若\n能\n琴\n堪\n夭\n甩\n署\n侧\n罪\n酬\n植\n糟\n卓\n毙\n盼\n咳\n氓\n保\n丽\n.\n斯\n喊\n断\n茧\n7\n·\n呻\n乖\n我\n皂\n忠\n涅\n辉\n凤\n拧\n辫\n垢\n拼\n毁\n穗\n方\n荧\n稣\n孔\n微\n鹳\n敲\n猛\n丛\n”\n水\n肪\n俯\n腰\n苞\n旋\n铺\n期\n蒂\n梢\n面\n盔\n沸\n造\n琦\n磨\n枯\n就\n梳\n郑\n远\n限\n席\n钮\n绑\n辊\n慎\n榆\n憾\n即\n盗\n幸\n柄\n警\n拦\n衔\n碗\n围\n哥\n搐\n该\n卜\n冕\n锅\n垦\n概\n道\n峋\n衡\n眯\n费\n肯\n璃\n洼\n实\n厌\n饱\n唇\n牲\n允\n夹\n蜿\n岁\n威\nn\n馨\n涎\n恿\n肛\n蚌\n库\n懂\n娴\n无\n稼\n械\n貌\n榭\n孪\n坯\n坑\n壤\n桨\n耀\n湖\n恳\n篮\n冲\nB\n区\n抨\n珀\n獗\n“\n誓\n慨\n肴\n缪\n缰\n蘸\n瞄\n俄\n帐\n绣\n猾\n刍\n峰\n盐\n盈\n毋\n艾\n蝶\n呜\n奋\n烫\nF\nq\n仰\n汞\n娠\n盹\n旷\n窦\n褂\n忏\n狩\n鹦\n雹\n沓\n坪\n筑\n律\n腓\n吨\n右\n值\n基\n祀\n鸽\n莉\n晰\n收\n贮\n排\n莪\n名\n迫\n赃\n抗\n摄\n罢\n节\n啮\n鼾\n任\n播\n梗\n吭\n蓝\n尉\n箭\n风\n孀\n垃\n垄\n仕\n呆\n辟\n晒\n蓟\n持\n砌\n廷\n付\n死\n片\n删\n磺\n角\n篡\n斥\n芋\n移\n熙\nM\n射\n筐\n职\n懈\n虏\n旅\n喀\n疏\n磅\n靴\n广\n醺\n玲\n楠\n摒\n渣\n憋\n弛\n摆\n理\n潮\n辐\n悚\n觊\n曾\n兜\n获\n雁\n鸵\n眠\n谣\n张\n兼\n炸\n伺\n胥\n葵\n涡\n忍\n唐\n庆\n唱\n柔\n婚\n窥\n稍\n筒\n病\n攻\n廖\n墟\n末\n颗\n朦\n鲸\n隐\n犁\n叼\n勘\n胚\n枭\n鲑\n辍\n皈\n轶\n添\n贿\n诉\n穷\n晨\n廓\n揉\n匈\n炭\n耻\n耐\n恒\n佩\n副\n硒\n晷\n谢\n哟\n黜\n牟"
  },
  {
    "path": "translate_duba.py",
    "content": "\nfrom urllib import request\nfrom lxml import etree\n\nclass Translate:\n    def __init__(self):\n        return\n\n    '''获取html'''\n    def get_html(self, url):\n        headers = {\n            'User-Agent': r'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '\n                          r'Chrome/45.0.2454.85 Safari/537.36 115Browser/6.0.3',\n            'Connection': 'keep-alive'\n        }\n        req = request.Request(url, headers=headers)\n        page = request.urlopen(req).read()\n        page = page.decode('utf-8')\n        return page\n\n    '''解析翻译答案'''\n    def extract_answers(self, content):\n        selector = etree.HTML(content)\n        answer = selector.xpath('//div[@class=\"in-base\"]/div/div/text()')[0]\n        return answer\n\n    '''翻译主函数'''\n    def translate(self, query):\n        url = 'http://www.iciba.com/{}'.format(query)\n        html = self.get_html(url)\n        try:\n            answer = self.extract_answers(html)\n        except Exception as e:\n            answer = query\n        return answer\n\n\nif __name__ == '__main__':\n    handler = Translate()\n    while 1:\n        query = input('entere an sent to translate:')\n        res = handler.translate(query)\n        print(res)\n"
  }
]