Repository: 425776024/bertsum-chinese Branch: master Commit: 7e44258b93ea Files: 77 Total size: 61.9 MB Directory structure: gitextract_oy6ipegf/ ├── .gitignore ├── .idea/ │ ├── .gitignore │ ├── bertsum-chinese.iml │ ├── inspectionProfiles/ │ │ ├── Project_Default.xml │ │ └── profiles_settings.xml │ ├── misc.xml │ ├── modules.xml │ └── vcs.xml ├── README.md ├── bert-chinese-web/ │ ├── .idea/ │ │ ├── .gitignore │ │ ├── bert-chinese-web.iml │ │ ├── inspectionProfiles/ │ │ │ ├── Project_Default.xml │ │ │ └── profiles_settings.xml │ │ ├── misc.xml │ │ └── modules.xml │ ├── README.md │ ├── bert-base-chinese/ │ │ ├── config.json │ │ └── vocab.txt │ ├── config.py │ ├── predict.py │ ├── src/ │ │ ├── models/ │ │ │ ├── __init__.py │ │ │ ├── encoder.py │ │ │ ├── model_builder_LAI.py │ │ │ ├── neural.py │ │ │ ├── optimizers.py │ │ │ └── rnn.py │ │ ├── others/ │ │ │ ├── __init__.py │ │ │ └── utils.py │ │ └── prepro/ │ │ ├── __init__.py │ │ └── data_builder.py │ ├── templates/ │ │ └── index.html │ └── web_main.py ├── bert-sum-dataprocess/ │ ├── .idea/ │ │ ├── .gitignore │ │ ├── bert-sum-dataprocess.iml │ │ ├── inspectionProfiles/ │ │ │ ├── Project_Default.xml │ │ │ └── profiles_settings.xml │ │ ├── misc.xml │ │ └── modules.xml │ ├── README.md │ ├── data/ │ │ └── scope.csv │ ├── json_data/ │ │ ├── LCSTS.test.0.json │ │ ├── LCSTS.train.0.json │ │ └── scope.train.chunk_size_1.0.json │ ├── main.py │ └── src/ │ ├── __init__.py │ └── utils.py └── bertsum-chinese/ ├── .idea/ │ ├── .gitignore │ ├── bertsum-chinese.iml │ ├── inspectionProfiles/ │ │ ├── Project_Default.xml │ │ └── profiles_settings.xml │ ├── misc.xml │ ├── modules.xml │ └── vcs.xml ├── LICENSE ├── README.md ├── args_config.py ├── bert_data/ │ └── LCSTS.train.1.bert.pt ├── json_data/ │ └── LCSTS.train.1.json ├── logs/ │ └── bert_classifier ├── preprocess_LAI.py ├── requirements.txt ├── src/ │ ├── __init__.py │ ├── models/ │ │ ├── __init__.py │ │ ├── data_loader.py │ │ ├── encoder.py │ │ ├── model_builder_LAI.py │ │ ├── neural.py │ │ ├── optimizers.py │ │ ├── rnn.py │ │ └── trainer.py │ ├── others/ │ │ ├── __init__.py │ │ ├── logging.py │ │ ├── statistical.py │ │ └── utils.py │ └── prepro/ │ ├── __init__.py │ └── data_builder_LAI.py └── train_LAI.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ led / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints/ *.ipynb # IPython profile_default/ ipython_config.py # pyenv .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # celery beat schedule file celerybeat-schedule # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # Mac .DS_Store ================================================ FILE: .idea/.gitignore ================================================ # Default ignored files /shelf/ /workspace.xml ================================================ FILE: .idea/bertsum-chinese.iml ================================================ ================================================ FILE: .idea/inspectionProfiles/Project_Default.xml ================================================ ================================================ FILE: .idea/inspectionProfiles/profiles_settings.xml ================================================ ================================================ FILE: .idea/misc.xml ================================================ ================================================ FILE: .idea/modules.xml ================================================ ================================================ FILE: .idea/vcs.xml ================================================ ================================================ FILE: README.md ================================================ # BERTSUM中文摘要抽取代码 **搬砖不易,欢迎star** - bert-chinese-web//web小接口,可以浏览器中展示 - bert-sum-dataprocess//数据处理 - bertsum-chinese//模型训练 ================================================ FILE: bert-chinese-web/.idea/.gitignore ================================================ # Default ignored files /shelf/ /workspace.xml ================================================ FILE: bert-chinese-web/.idea/bert-chinese-web.iml ================================================ ================================================ FILE: bert-chinese-web/.idea/inspectionProfiles/Project_Default.xml ================================================ ================================================ FILE: bert-chinese-web/.idea/inspectionProfiles/profiles_settings.xml ================================================ ================================================ FILE: bert-chinese-web/.idea/misc.xml ================================================ ================================================ FILE: bert-chinese-web/.idea/modules.xml ================================================ ================================================ FILE: bert-chinese-web/README.md ================================================ # 抽取式文本摘要模型bertsum,接口部署 (config.py下配置,放好模型) 运行web_main.py,启动http接口 ```` request:{ url : ip/api_summary type: post doc : '原始文本' } return:{ 摘要文本 } ``` ================================================ FILE: bert-chinese-web/bert-base-chinese/config.json ================================================ { "attention_probs_dropout_prob": 0.1, "directionality": "bidi", "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "initializer_range": 0.02, "intermediate_size": 3072, "max_position_embeddings": 512, "num_attention_heads": 12, "num_hidden_layers": 12, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "type_vocab_size": 2, "vocab_size": 21128 } ================================================ FILE: bert-chinese-web/bert-base-chinese/vocab.txt ================================================ [PAD] [unused1] [unused2] [unused3] [unused4] [unused5] [unused6] [unused7] [unused8] [unused9] [unused10] [unused11] [unused12] [unused13] [unused14] [unused15] [unused16] [unused17] [unused18] [unused19] [unused20] [unused21] [unused22] [unused23] [unused24] [unused25] [unused26] [unused27] [unused28] [unused29] [unused30] [unused31] [unused32] [unused33] [unused34] [unused35] [unused36] [unused37] [unused38] [unused39] [unused40] [unused41] [unused42] [unused43] [unused44] [unused45] [unused46] [unused47] [unused48] [unused49] [unused50] [unused51] [unused52] [unused53] [unused54] [unused55] [unused56] [unused57] [unused58] [unused59] [unused60] [unused61] [unused62] [unused63] [unused64] [unused65] [unused66] [unused67] [unused68] [unused69] [unused70] [unused71] [unused72] [unused73] [unused74] [unused75] [unused76] [unused77] [unused78] [unused79] [unused80] [unused81] [unused82] [unused83] [unused84] [unused85] [unused86] [unused87] [unused88] [unused89] [unused90] [unused91] [unused92] [unused93] [unused94] [unused95] [unused96] [unused97] [unused98] [unused99] [UNK] [CLS] [SEP] [MASK] <S> <T> ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ [ \ ] ^ _ a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ £ ¤ ¥ § © « ® ° ± ² ³ µ · ¹ º » ¼ × ß æ ÷ ø đ ŋ ɔ ə ɡ ʰ ˇ ˈ ˊ ˋ ˍ ː ˙ ˚ ˢ α β γ δ ε η θ ι κ λ μ ν ο π ρ ς σ τ υ φ χ ψ ω а б в г д е ж з и к л м н о п р с т у ф х ц ч ш ы ь я і ا ب ة ت د ر س ع ل م ن ه و ي ۩ ก ง น ม ย ร อ า เ ๑ ་ ღ ᄀ ᄁ ᄂ ᄃ ᄅ ᄆ ᄇ ᄈ ᄉ ᄋ ᄌ ᄎ ᄏ ᄐ ᄑ ᄒ ᅡ ᅢ ᅣ ᅥ ᅦ ᅧ ᅨ ᅩ ᅪ ᅬ ᅭ ᅮ ᅯ ᅲ ᅳ ᅴ ᅵ ᆨ ᆫ ᆯ ᆷ ᆸ ᆺ ᆻ ᆼ ᗜ ᵃ ᵉ ᵍ ᵏ ᵐ ᵒ ᵘ ‖ „ † • ‥ ‧ 
 ‰ ′ ″ ‹ › ※ ‿ ⁄ ⁱ ⁺ ⁿ ₁ ₂ ₃ ₄ € ℃ № ™ ⅰ ⅱ ⅲ ⅳ ⅴ ← ↑ → ↓ ↔ ↗ ↘ ⇒ ∀ − ∕ ∙ √ ∞ ∟ ∠ ∣ ∥ ∩ ∮ ∶ ∼ ∽ ≈ ≒ ≡ ≤ ≥ ≦ ≧ ≪ ≫ ⊙ ⋅ ⋈ ⋯ ⌒ ① ② ③ ④ ⑤ ⑥ ⑦ ⑧ ⑨ ⑩ ⑴ ⑵ ⑶ ⑷ ⑸ ⒈ ⒉ ⒊ ⒋ ⓒ ⓔ ⓘ ─ ━ │ ┃ ┅ ┆ ┊ ┌ └ ├ ┣ ═ ║ ╚ ╞ ╠ ╭ ╮ ╯ ╰ ╱ ╳ ▂ ▃ ▅ ▇ █ ▉ ▋ ▌ ▍ ▎ ■ □ ▪ ▫ ▬ ▲ △ ▶ ► ▼ ▽ ◆ ◇ ○ ◎ ● ◕ ◠ ◢ ◤ ☀ ★ ☆ ☕ ☞ ☺ ☼ ♀ ♂ ♠ ♡ ♣ ♥ ♦ ♪ ♫ ♬ ✈ ✔ ✕ ✖ ✦ ✨ ✪ ✰ ✿ ❀ ❤ ➜ ➤ ⦿ 、 。 〃 々 〇 〈 〉 《 》 「 」 『 』 【 】 〓 〔 〕 〖 〗 〜 〝 〞 ぁ あ ぃ い う ぇ え お か き く け こ さ し す せ そ た ち っ つ て と な に ぬ ね の は ひ ふ へ ほ ま み む め も ゃ や ゅ ゆ ょ よ ら り る れ ろ わ を ん ゜ ゝ ァ ア ィ イ ゥ ウ ェ エ ォ オ カ キ ク ケ コ サ シ ス セ ソ タ チ ッ ツ テ ト ナ ニ ヌ ネ ノ ハ ヒ フ ヘ ホ マ ミ ム メ モ ャ ヤ ュ ユ ョ ヨ ラ リ ル レ ロ ワ ヲ ン ヶ ・ ー ヽ ㄅ ㄆ ㄇ ㄉ ㄋ ㄌ ㄍ ㄎ ㄏ ㄒ ㄚ ㄛ ㄞ ㄟ ㄢ ㄤ ㄥ ㄧ ㄨ ㆍ ㈦ ㊣ ㎡ 㗎 一 丁 七 万 丈 三 上 下 不 与 丐 丑 专 且 丕 世 丘 丙 业 丛 东 丝 丞 丟 両 丢 两 严 並 丧 丨 个 丫 中 丰 串 临 丶 丸 丹 为 主 丼 丽 举 丿 乂 乃 久 么 义 之 乌 乍 乎 乏 乐 乒 乓 乔 乖 乗 乘 乙 乜 九 乞 也 习 乡 书 乩 买 乱 乳 乾 亀 亂 了 予 争 事 二 于 亏 云 互 五 井 亘 亙 亚 些 亜 亞 亟 亡 亢 交 亥 亦 产 亨 亩 享 京 亭 亮 亲 亳 亵 人 亿 什 仁 仃 仄 仅 仆 仇 今 介 仍 从 仏 仑 仓 仔 仕 他 仗 付 仙 仝 仞 仟 代 令 以 仨 仪 们 仮 仰 仲 件 价 任 份 仿 企 伉 伊 伍 伎 伏 伐 休 伕 众 优 伙 会 伝 伞 伟 传 伢 伤 伦 伪 伫 伯 估 伴 伶 伸 伺 似 伽 佃 但 佇 佈 位 低 住 佐 佑 体 佔 何 佗 佘 余 佚 佛 作 佝 佞 佟 你 佢 佣 佤 佥 佩 佬 佯 佰 佳 併 佶 佻 佼 使 侃 侄 來 侈 例 侍 侏 侑 侖 侗 供 依 侠 価 侣 侥 侦 侧 侨 侬 侮 侯 侵 侶 侷 便 係 促 俄 俊 俎 俏 俐 俑 俗 俘 俚 保 俞 俟 俠 信 俨 俩 俪 俬 俭 修 俯 俱 俳 俸 俺 俾 倆 倉 個 倌 倍 倏 們 倒 倔 倖 倘 候 倚 倜 借 倡 値 倦 倩 倪 倫 倬 倭 倶 债 值 倾 偃 假 偈 偉 偌 偎 偏 偕 做 停 健 側 偵 偶 偷 偻 偽 偿 傀 傅 傍 傑 傘 備 傚 傢 傣 傥 储 傩 催 傭 傲 傳 債 傷 傻 傾 僅 働 像 僑 僕 僖 僚 僥 僧 僭 僮 僱 僵 價 僻 儀 儂 億 儆 儉 儋 儒 儕 儘 償 儡 優 儲 儷 儼 儿 兀 允 元 兄 充 兆 兇 先 光 克 兌 免 児 兑 兒 兔 兖 党 兜 兢 入 內 全 兩 八 公 六 兮 兰 共 兲 关 兴 兵 其 具 典 兹 养 兼 兽 冀 内 円 冇 冈 冉 冊 册 再 冏 冒 冕 冗 写 军 农 冠 冢 冤 冥 冨 冪 冬 冯 冰 冲 决 况 冶 冷 冻 冼 冽 冾 净 凄 准 凇 凈 凉 凋 凌 凍 减 凑 凛 凜 凝 几 凡 凤 処 凪 凭 凯 凰 凱 凳 凶 凸 凹 出 击 函 凿 刀 刁 刃 分 切 刈 刊 刍 刎 刑 划 列 刘 则 刚 创 初 删 判 別 刨 利 刪 别 刮 到 制 刷 券 刹 刺 刻 刽 剁 剂 剃 則 剉 削 剋 剌 前 剎 剐 剑 剔 剖 剛 剜 剝 剣 剤 剥 剧 剩 剪 副 割 創 剷 剽 剿 劃 劇 劈 劉 劊 劍 劏 劑 力 劝 办 功 加 务 劣 动 助 努 劫 劭 励 劲 劳 労 劵 効 劾 势 勁 勃 勇 勉 勋 勐 勒 動 勖 勘 務 勛 勝 勞 募 勢 勤 勧 勳 勵 勸 勺 勻 勾 勿 匀 包 匆 匈 匍 匐 匕 化 北 匙 匝 匠 匡 匣 匪 匮 匯 匱 匹 区 医 匾 匿 區 十 千 卅 升 午 卉 半 卍 华 协 卑 卒 卓 協 单 卖 南 単 博 卜 卞 卟 占 卡 卢 卤 卦 卧 卫 卮 卯 印 危 即 却 卵 卷 卸 卻 卿 厂 厄 厅 历 厉 压 厌 厕 厘 厚 厝 原 厢 厥 厦 厨 厩 厭 厮 厲 厳 去 县 叁 参 參 又 叉 及 友 双 反 収 发 叔 取 受 变 叙 叛 叟 叠 叡 叢 口 古 句 另 叨 叩 只 叫 召 叭 叮 可 台 叱 史 右 叵 叶 号 司 叹 叻 叼 叽 吁 吃 各 吆 合 吉 吊 吋 同 名 后 吏 吐 向 吒 吓 吕 吖 吗 君 吝 吞 吟 吠 吡 否 吧 吨 吩 含 听 吭 吮 启 吱 吳 吴 吵 吶 吸 吹 吻 吼 吽 吾 呀 呂 呃 呆 呈 告 呋 呎 呐 呓 呕 呗 员 呛 呜 呢 呤 呦 周 呱 呲 味 呵 呷 呸 呻 呼 命 咀 咁 咂 咄 咆 咋 和 咎 咏 咐 咒 咔 咕 咖 咗 咘 咙 咚 咛 咣 咤 咦 咧 咨 咩 咪 咫 咬 咭 咯 咱 咲 咳 咸 咻 咽 咿 哀 品 哂 哄 哆 哇 哈 哉 哋 哌 响 哎 哏 哐 哑 哒 哔 哗 哟 員 哥 哦 哧 哨 哩 哪 哭 哮 哲 哺 哼 哽 唁 唄 唆 唇 唉 唏 唐 唑 唔 唠 唤 唧 唬 售 唯 唰 唱 唳 唷 唸 唾 啃 啄 商 啉 啊 問 啓 啕 啖 啜 啞 啟 啡 啤 啥 啦 啧 啪 啫 啬 啮 啰 啱 啲 啵 啶 啷 啸 啻 啼 啾 喀 喂 喃 善 喆 喇 喉 喊 喋 喎 喏 喔 喘 喙 喚 喜 喝 喟 喧 喪 喫 喬 單 喰 喱 喲 喳 喵 営 喷 喹 喺 喻 喽 嗅 嗆 嗇 嗎 嗑 嗒 嗓 嗔 嗖 嗚 嗜 嗝 嗟 嗡 嗣 嗤 嗦 嗨 嗪 嗬 嗯 嗰 嗲 嗳 嗶 嗷 嗽 嘀 嘅 嘆 嘈 嘉 嘌 嘍 嘎 嘔 嘖 嘗 嘘 嘚 嘛 嘜 嘞 嘟 嘢 嘣 嘤 嘧 嘩 嘭 嘮 嘯 嘰 嘱 嘲 嘴 嘶 嘸 嘹 嘻 嘿 噁 噌 噎 噓 噔 噗 噙 噜 噠 噢 噤 器 噩 噪 噬 噱 噴 噶 噸 噹 噻 噼 嚀 嚇 嚎 嚏 嚐 嚓 嚕 嚟 嚣 嚥 嚨 嚮 嚴 嚷 嚼 囂 囉 囊 囍 囑 囔 囗 囚 四 囝 回 囟 因 囡 团 団 囤 囧 囪 囫 园 困 囱 囲 図 围 囹 固 国 图 囿 圃 圄 圆 圈 國 圍 圏 園 圓 圖 團 圜 土 圣 圧 在 圩 圭 地 圳 场 圻 圾 址 坂 均 坊 坍 坎 坏 坐 坑 块 坚 坛 坝 坞 坟 坠 坡 坤 坦 坨 坪 坯 坳 坵 坷 垂 垃 垄 型 垒 垚 垛 垠 垢 垣 垦 垩 垫 垭 垮 垵 埂 埃 埋 城 埔 埕 埗 域 埠 埤 埵 執 埸 培 基 埼 堀 堂 堃 堅 堆 堇 堑 堕 堙 堡 堤 堪 堯 堰 報 場 堵 堺 堿 塊 塌 塑 塔 塗 塘 塚 塞 塢 塩 填 塬 塭 塵 塾 墀 境 墅 墉 墊 墒 墓 増 墘 墙 墜 增 墟 墨 墩 墮 墳 墻 墾 壁 壅 壆 壇 壊 壑 壓 壕 壘 壞 壟 壢 壤 壩 士 壬 壮 壯 声 売 壳 壶 壹 壺 壽 处 备 変 复 夏 夔 夕 外 夙 多 夜 够 夠 夢 夥 大 天 太 夫 夭 央 夯 失 头 夷 夸 夹 夺 夾 奂 奄 奇 奈 奉 奋 奎 奏 奐 契 奔 奕 奖 套 奘 奚 奠 奢 奥 奧 奪 奬 奮 女 奴 奶 奸 她 好 如 妃 妄 妆 妇 妈 妊 妍 妒 妓 妖 妘 妙 妝 妞 妣 妤 妥 妨 妩 妪 妮 妲 妳 妹 妻 妾 姆 姉 姊 始 姍 姐 姑 姒 姓 委 姗 姚 姜 姝 姣 姥 姦 姨 姪 姫 姬 姹 姻 姿 威 娃 娄 娅 娆 娇 娉 娑 娓 娘 娛 娜 娟 娠 娣 娥 娩 娱 娲 娴 娶 娼 婀 婁 婆 婉 婊 婕 婚 婢 婦 婧 婪 婭 婴 婵 婶 婷 婺 婿 媒 媚 媛 媞 媧 媲 媳 媽 媾 嫁 嫂 嫉 嫌 嫑 嫔 嫖 嫘 嫚 嫡 嫣 嫦 嫩 嫲 嫵 嫻 嬅 嬉 嬌 嬗 嬛 嬢 嬤 嬪 嬰 嬴 嬷 嬸 嬿 孀 孃 子 孑 孔 孕 孖 字 存 孙 孚 孛 孜 孝 孟 孢 季 孤 学 孩 孪 孫 孬 孰 孱 孳 孵 學 孺 孽 孿 宁 它 宅 宇 守 安 宋 完 宏 宓 宕 宗 官 宙 定 宛 宜 宝 实 実 宠 审 客 宣 室 宥 宦 宪 宫 宮 宰 害 宴 宵 家 宸 容 宽 宾 宿 寂 寄 寅 密 寇 富 寐 寒 寓 寛 寝 寞 察 寡 寢 寥 實 寧 寨 審 寫 寬 寮 寰 寵 寶 寸 对 寺 寻 导 対 寿 封 専 射 将 將 專 尉 尊 尋 對 導 小 少 尔 尕 尖 尘 尚 尝 尤 尧 尬 就 尴 尷 尸 尹 尺 尻 尼 尽 尾 尿 局 屁 层 屄 居 屆 屈 屉 届 屋 屌 屍 屎 屏 屐 屑 展 屜 属 屠 屡 屢 層 履 屬 屯 山 屹 屿 岀 岁 岂 岌 岐 岑 岔 岖 岗 岘 岙 岚 岛 岡 岩 岫 岬 岭 岱 岳 岷 岸 峇 峋 峒 峙 峡 峤 峥 峦 峨 峪 峭 峯 峰 峴 島 峻 峽 崁 崂 崆 崇 崎 崑 崔 崖 崗 崙 崛 崧 崩 崭 崴 崽 嵇 嵊 嵋 嵌 嵐 嵘 嵩 嵬 嵯 嶂 嶄 嶇 嶋 嶙 嶺 嶼 嶽 巅 巍 巒 巔 巖 川 州 巡 巢 工 左 巧 巨 巩 巫 差 己 已 巳 巴 巷 巻 巽 巾 巿 币 市 布 帅 帆 师 希 帐 帑 帕 帖 帘 帚 帛 帜 帝 帥 带 帧 師 席 帮 帯 帰 帳 帶 帷 常 帼 帽 幀 幂 幄 幅 幌 幔 幕 幟 幡 幢 幣 幫 干 平 年 并 幸 幹 幺 幻 幼 幽 幾 广 庁 広 庄 庆 庇 床 序 庐 库 应 底 庖 店 庙 庚 府 庞 废 庠 度 座 庫 庭 庵 庶 康 庸 庹 庾 廁 廂 廃 廈 廉 廊 廓 廖 廚 廝 廟 廠 廢 廣 廬 廳 延 廷 建 廿 开 弁 异 弃 弄 弈 弊 弋 式 弑 弒 弓 弔 引 弗 弘 弛 弟 张 弥 弦 弧 弩 弭 弯 弱 張 強 弹 强 弼 弾 彅 彆 彈 彌 彎 归 当 录 彗 彙 彝 形 彤 彥 彦 彧 彩 彪 彫 彬 彭 彰 影 彷 役 彻 彼 彿 往 征 径 待 徇 很 徉 徊 律 後 徐 徑 徒 従 徕 得 徘 徙 徜 從 徠 御 徨 復 循 徬 微 徳 徴 徵 德 徹 徼 徽 心 必 忆 忌 忍 忏 忐 忑 忒 忖 志 忘 忙 応 忠 忡 忤 忧 忪 快 忱 念 忻 忽 忿 怀 态 怂 怅 怆 怎 怏 怒 怔 怕 怖 怙 怜 思 怠 怡 急 怦 性 怨 怪 怯 怵 总 怼 恁 恃 恆 恋 恍 恐 恒 恕 恙 恚 恢 恣 恤 恥 恨 恩 恪 恫 恬 恭 息 恰 恳 恵 恶 恸 恺 恻 恼 恿 悄 悅 悉 悌 悍 悔 悖 悚 悟 悠 患 悦 您 悩 悪 悬 悯 悱 悲 悴 悵 悶 悸 悻 悼 悽 情 惆 惇 惊 惋 惑 惕 惘 惚 惜 惟 惠 惡 惦 惧 惨 惩 惫 惬 惭 惮 惯 惰 惱 想 惴 惶 惹 惺 愁 愆 愈 愉 愍 意 愕 愚 愛 愜 感 愣 愤 愧 愫 愷 愿 慄 慈 態 慌 慎 慑 慕 慘 慚 慟 慢 慣 慧 慨 慫 慮 慰 慳 慵 慶 慷 慾 憂 憊 憋 憎 憐 憑 憔 憚 憤 憧 憨 憩 憫 憬 憲 憶 憾 懂 懇 懈 應 懊 懋 懑 懒 懦 懲 懵 懶 懷 懸 懺 懼 懾 懿 戀 戈 戊 戌 戍 戎 戏 成 我 戒 戕 或 战 戚 戛 戟 戡 戦 截 戬 戮 戰 戲 戳 戴 戶 户 戸 戻 戾 房 所 扁 扇 扈 扉 手 才 扎 扑 扒 打 扔 払 托 扛 扣 扦 执 扩 扪 扫 扬 扭 扮 扯 扰 扱 扳 扶 批 扼 找 承 技 抄 抉 把 抑 抒 抓 投 抖 抗 折 抚 抛 抜 択 抟 抠 抡 抢 护 报 抨 披 抬 抱 抵 抹 押 抽 抿 拂 拄 担 拆 拇 拈 拉 拋 拌 拍 拎 拐 拒 拓 拔 拖 拗 拘 拙 拚 招 拜 拟 拡 拢 拣 拥 拦 拧 拨 择 括 拭 拮 拯 拱 拳 拴 拷 拼 拽 拾 拿 持 挂 指 挈 按 挎 挑 挖 挙 挚 挛 挝 挞 挟 挠 挡 挣 挤 挥 挨 挪 挫 振 挲 挹 挺 挽 挾 捂 捅 捆 捉 捋 捌 捍 捎 捏 捐 捕 捞 损 捡 换 捣 捧 捨 捩 据 捱 捲 捶 捷 捺 捻 掀 掂 掃 掇 授 掉 掌 掏 掐 排 掖 掘 掙 掛 掠 採 探 掣 接 控 推 掩 措 掬 掰 掲 掳 掴 掷 掸 掺 揀 揃 揄 揆 揉 揍 描 提 插 揖 揚 換 握 揣 揩 揪 揭 揮 援 揶 揸 揹 揽 搀 搁 搂 搅 損 搏 搐 搓 搔 搖 搗 搜 搞 搡 搪 搬 搭 搵 搶 携 搽 摀 摁 摄 摆 摇 摈 摊 摒 摔 摘 摞 摟 摧 摩 摯 摳 摸 摹 摺 摻 撂 撃 撅 撇 撈 撐 撑 撒 撓 撕 撚 撞 撤 撥 撩 撫 撬 播 撮 撰 撲 撵 撷 撸 撻 撼 撿 擀 擁 擂 擄 擅 擇 擊 擋 操 擎 擒 擔 擘 據 擞 擠 擡 擢 擦 擬 擰 擱 擲 擴 擷 擺 擼 擾 攀 攏 攒 攔 攘 攙 攜 攝 攞 攢 攣 攤 攥 攪 攫 攬 支 收 攸 改 攻 放 政 故 效 敌 敍 敎 敏 救 敕 敖 敗 敘 教 敛 敝 敞 敢 散 敦 敬 数 敲 整 敵 敷 數 斂 斃 文 斋 斌 斎 斐 斑 斓 斗 料 斛 斜 斟 斡 斤 斥 斧 斩 斫 斬 断 斯 新 斷 方 於 施 旁 旃 旅 旋 旌 旎 族 旖 旗 无 既 日 旦 旧 旨 早 旬 旭 旮 旱 时 旷 旺 旻 昀 昂 昆 昇 昉 昊 昌 明 昏 易 昔 昕 昙 星 映 春 昧 昨 昭 是 昱 昴 昵 昶 昼 显 晁 時 晃 晉 晋 晌 晏 晒 晓 晔 晕 晖 晗 晚 晝 晞 晟 晤 晦 晨 晩 普 景 晰 晴 晶 晷 智 晾 暂 暄 暇 暈 暉 暌 暐 暑 暖 暗 暝 暢 暧 暨 暫 暮 暱 暴 暸 暹 曄 曆 曇 曉 曖 曙 曜 曝 曠 曦 曬 曰 曲 曳 更 書 曹 曼 曾 替 最 會 月 有 朋 服 朐 朔 朕 朗 望 朝 期 朦 朧 木 未 末 本 札 朮 术 朱 朴 朵 机 朽 杀 杂 权 杆 杈 杉 李 杏 材 村 杓 杖 杜 杞 束 杠 条 来 杨 杭 杯 杰 東 杳 杵 杷 杼 松 板 极 构 枇 枉 枋 析 枕 林 枚 果 枝 枢 枣 枪 枫 枭 枯 枰 枱 枳 架 枷 枸 柄 柏 某 柑 柒 染 柔 柘 柚 柜 柞 柠 柢 查 柩 柬 柯 柱 柳 柴 柵 査 柿 栀 栃 栄 栅 标 栈 栉 栋 栎 栏 树 栓 栖 栗 校 栩 株 样 核 根 格 栽 栾 桀 桁 桂 桃 桅 框 案 桉 桌 桎 桐 桑 桓 桔 桜 桠 桡 桢 档 桥 桦 桧 桨 桩 桶 桿 梁 梅 梆 梏 梓 梗 條 梟 梢 梦 梧 梨 梭 梯 械 梳 梵 梶 检 棂 棄 棉 棋 棍 棒 棕 棗 棘 棚 棟 棠 棣 棧 森 棱 棲 棵 棹 棺 椁 椅 椋 植 椎 椒 検 椪 椭 椰 椹 椽 椿 楂 楊 楓 楔 楚 楝 楞 楠 楣 楨 楫 業 楮 極 楷 楸 楹 楼 楽 概 榄 榆 榈 榉 榔 榕 榖 榛 榜 榨 榫 榭 榮 榱 榴 榷 榻 槁 槃 構 槌 槍 槎 槐 槓 様 槛 槟 槤 槭 槲 槳 槻 槽 槿 樁 樂 樊 樑 樓 標 樞 樟 模 樣 権 横 樫 樯 樱 樵 樸 樹 樺 樽 樾 橄 橇 橋 橐 橘 橙 機 橡 橢 橫 橱 橹 橼 檀 檄 檎 檐 檔 檗 檜 檢 檬 檯 檳 檸 檻 櫃 櫚 櫛 櫥 櫸 櫻 欄 權 欒 欖 欠 次 欢 欣 欧 欲 欸 欺 欽 款 歆 歇 歉 歌 歎 歐 歓 歙 歛 歡 止 正 此 步 武 歧 歩 歪 歯 歲 歳 歴 歷 歸 歹 死 歼 殁 殃 殆 殇 殉 殊 残 殒 殓 殖 殘 殞 殡 殤 殭 殯 殲 殴 段 殷 殺 殼 殿 毀 毁 毂 毅 毆 毋 母 毎 每 毒 毓 比 毕 毗 毘 毙 毛 毡 毫 毯 毽 氈 氏 氐 民 氓 气 氖 気 氙 氛 氟 氡 氢 氣 氤 氦 氧 氨 氪 氫 氮 氯 氰 氲 水 氷 永 氹 氾 汀 汁 求 汆 汇 汉 汎 汐 汕 汗 汙 汛 汝 汞 江 池 污 汤 汨 汩 汪 汰 汲 汴 汶 汹 決 汽 汾 沁 沂 沃 沅 沈 沉 沌 沏 沐 沒 沓 沖 沙 沛 沟 没 沢 沣 沥 沦 沧 沪 沫 沭 沮 沱 河 沸 油 治 沼 沽 沾 沿 況 泄 泉 泊 泌 泓 法 泗 泛 泞 泠 泡 波 泣 泥 注 泪 泫 泮 泯 泰 泱 泳 泵 泷 泸 泻 泼 泽 泾 洁 洄 洋 洒 洗 洙 洛 洞 津 洩 洪 洮 洱 洲 洵 洶 洸 洹 活 洼 洽 派 流 浃 浄 浅 浆 浇 浊 测 济 浏 浑 浒 浓 浔 浙 浚 浜 浣 浦 浩 浪 浬 浮 浯 浴 海 浸 涂 涅 涇 消 涉 涌 涎 涓 涔 涕 涙 涛 涝 涞 涟 涠 涡 涣 涤 润 涧 涨 涩 涪 涮 涯 液 涵 涸 涼 涿 淀 淄 淅 淆 淇 淋 淌 淑 淒 淖 淘 淙 淚 淞 淡 淤 淦 淨 淩 淪 淫 淬 淮 深 淳 淵 混 淹 淺 添 淼 清 済 渉 渊 渋 渍 渎 渐 渔 渗 渙 渚 減 渝 渠 渡 渣 渤 渥 渦 温 測 渭 港 渲 渴 游 渺 渾 湃 湄 湊 湍 湖 湘 湛 湟 湧 湫 湮 湯 湳 湾 湿 満 溃 溅 溉 溏 源 準 溜 溝 溟 溢 溥 溧 溪 溫 溯 溱 溴 溶 溺 溼 滁 滂 滄 滅 滇 滋 滌 滑 滓 滔 滕 滙 滚 滝 滞 滟 满 滢 滤 滥 滦 滨 滩 滬 滯 滲 滴 滷 滸 滾 滿 漁 漂 漆 漉 漏 漓 演 漕 漠 漢 漣 漩 漪 漫 漬 漯 漱 漲 漳 漸 漾 漿 潆 潇 潋 潍 潑 潔 潘 潛 潜 潞 潟 潢 潤 潦 潧 潭 潮 潰 潴 潸 潺 潼 澀 澄 澆 澈 澍 澎 澗 澜 澡 澤 澧 澱 澳 澹 激 濁 濂 濃 濑 濒 濕 濘 濛 濟 濠 濡 濤 濫 濬 濮 濯 濱 濺 濾 瀅 瀆 瀉 瀋 瀏 瀑 瀕 瀘 瀚 瀛 瀝 瀞 瀟 瀧 瀨 瀬 瀰 瀾 灌 灏 灑 灘 灝 灞 灣 火 灬 灭 灯 灰 灵 灶 灸 灼 災 灾 灿 炀 炁 炅 炉 炊 炎 炒 炔 炕 炖 炙 炜 炫 炬 炭 炮 炯 炳 炷 炸 点 為 炼 炽 烁 烂 烃 烈 烊 烏 烘 烙 烛 烟 烤 烦 烧 烨 烩 烫 烬 热 烯 烷 烹 烽 焉 焊 焕 焖 焗 焘 焙 焚 焜 無 焦 焯 焰 焱 然 焼 煅 煉 煊 煌 煎 煒 煖 煙 煜 煞 煤 煥 煦 照 煨 煩 煮 煲 煸 煽 熄 熊 熏 熒 熔 熙 熟 熠 熨 熬 熱 熵 熹 熾 燁 燃 燄 燈 燉 燊 燎 燒 燔 燕 燙 燜 營 燥 燦 燧 燭 燮 燴 燻 燼 燿 爆 爍 爐 爛 爪 爬 爭 爰 爱 爲 爵 父 爷 爸 爹 爺 爻 爽 爾 牆 片 版 牌 牍 牒 牙 牛 牝 牟 牠 牡 牢 牦 牧 物 牯 牲 牴 牵 特 牺 牽 犀 犁 犄 犊 犍 犒 犢 犧 犬 犯 状 犷 犸 犹 狀 狂 狄 狈 狎 狐 狒 狗 狙 狞 狠 狡 狩 独 狭 狮 狰 狱 狸 狹 狼 狽 猎 猕 猖 猗 猙 猛 猜 猝 猥 猩 猪 猫 猬 献 猴 猶 猷 猾 猿 獄 獅 獎 獐 獒 獗 獠 獣 獨 獭 獰 獲 獵 獷 獸 獺 獻 獼 獾 玄 率 玉 王 玑 玖 玛 玟 玠 玥 玩 玫 玮 环 现 玲 玳 玷 玺 玻 珀 珂 珅 珈 珉 珊 珍 珏 珐 珑 珙 珞 珠 珣 珥 珩 珪 班 珮 珲 珺 現 球 琅 理 琇 琉 琊 琍 琏 琐 琛 琢 琥 琦 琨 琪 琬 琮 琰 琲 琳 琴 琵 琶 琺 琼 瑀 瑁 瑄 瑋 瑕 瑗 瑙 瑚 瑛 瑜 瑞 瑟 瑠 瑣 瑤 瑩 瑪 瑯 瑰 瑶 瑾 璀 璁 璃 璇 璉 璋 璎 璐 璜 璞 璟 璧 璨 環 璽 璿 瓊 瓏 瓒 瓜 瓢 瓣 瓤 瓦 瓮 瓯 瓴 瓶 瓷 甄 甌 甕 甘 甙 甚 甜 生 產 産 甥 甦 用 甩 甫 甬 甭 甯 田 由 甲 申 电 男 甸 町 画 甾 畀 畅 界 畏 畑 畔 留 畜 畝 畢 略 畦 番 畫 異 畲 畳 畴 當 畸 畹 畿 疆 疇 疊 疏 疑 疔 疖 疗 疙 疚 疝 疟 疡 疣 疤 疥 疫 疮 疯 疱 疲 疳 疵 疸 疹 疼 疽 疾 痂 病 症 痈 痉 痊 痍 痒 痔 痕 痘 痙 痛 痞 痠 痢 痣 痤 痧 痨 痪 痫 痰 痱 痴 痹 痺 痼 痿 瘀 瘁 瘋 瘍 瘓 瘘 瘙 瘟 瘠 瘡 瘢 瘤 瘦 瘧 瘩 瘪 瘫 瘴 瘸 瘾 療 癇 癌 癒 癖 癜 癞 癡 癢 癣 癥 癫 癬 癮 癱 癲 癸 発 登 發 白 百 皂 的 皆 皇 皈 皋 皎 皑 皓 皖 皙 皚 皮 皰 皱 皴 皺 皿 盂 盃 盅 盆 盈 益 盎 盏 盐 监 盒 盔 盖 盗 盘 盛 盜 盞 盟 盡 監 盤 盥 盧 盪 目 盯 盱 盲 直 相 盹 盼 盾 省 眈 眉 看 県 眙 眞 真 眠 眦 眨 眩 眯 眶 眷 眸 眺 眼 眾 着 睁 睇 睏 睐 睑 睛 睜 睞 睡 睢 督 睥 睦 睨 睪 睫 睬 睹 睽 睾 睿 瞄 瞅 瞇 瞋 瞌 瞎 瞑 瞒 瞓 瞞 瞟 瞠 瞥 瞧 瞩 瞪 瞬 瞭 瞰 瞳 瞻 瞼 瞿 矇 矍 矗 矚 矛 矜 矢 矣 知 矩 矫 短 矮 矯 石 矶 矽 矾 矿 码 砂 砌 砍 砒 研 砖 砗 砚 砝 砣 砥 砧 砭 砰 砲 破 砷 砸 砺 砼 砾 础 硅 硐 硒 硕 硝 硫 硬 确 硯 硼 碁 碇 碉 碌 碍 碎 碑 碓 碗 碘 碚 碛 碟 碣 碧 碩 碰 碱 碳 碴 確 碼 碾 磁 磅 磊 磋 磐 磕 磚 磡 磨 磬 磯 磲 磷 磺 礁 礎 礙 礡 礦 礪 礫 礴 示 礼 社 祀 祁 祂 祇 祈 祉 祎 祐 祕 祖 祗 祚 祛 祜 祝 神 祟 祠 祢 祥 票 祭 祯 祷 祸 祺 祿 禀 禁 禄 禅 禍 禎 福 禛 禦 禧 禪 禮 禱 禹 禺 离 禽 禾 禿 秀 私 秃 秆 秉 秋 种 科 秒 秘 租 秣 秤 秦 秧 秩 秭 积 称 秸 移 秽 稀 稅 程 稍 税 稔 稗 稚 稜 稞 稟 稠 稣 種 稱 稲 稳 稷 稹 稻 稼 稽 稿 穀 穂 穆 穌 積 穎 穗 穢 穩 穫 穴 究 穷 穹 空 穿 突 窃 窄 窈 窍 窑 窒 窓 窕 窖 窗 窘 窜 窝 窟 窠 窥 窦 窨 窩 窪 窮 窯 窺 窿 竄 竅 竇 竊 立 竖 站 竜 竞 竟 章 竣 童 竭 端 競 竹 竺 竽 竿 笃 笆 笈 笋 笏 笑 笔 笙 笛 笞 笠 符 笨 第 笹 笺 笼 筆 等 筊 筋 筍 筏 筐 筑 筒 答 策 筛 筝 筠 筱 筲 筵 筷 筹 签 简 箇 箋 箍 箏 箐 箔 箕 算 箝 管 箩 箫 箭 箱 箴 箸 節 篁 範 篆 篇 築 篑 篓 篙 篝 篠 篡 篤 篩 篪 篮 篱 篷 簇 簌 簍 簡 簦 簧 簪 簫 簷 簸 簽 簾 簿 籁 籃 籌 籍 籐 籟 籠 籤 籬 籮 籲 米 类 籼 籽 粄 粉 粑 粒 粕 粗 粘 粟 粤 粥 粧 粪 粮 粱 粲 粳 粵 粹 粼 粽 精 粿 糅 糊 糍 糕 糖 糗 糙 糜 糞 糟 糠 糧 糬 糯 糰 糸 系 糾 紀 紂 約 紅 紉 紊 紋 納 紐 紓 純 紗 紘 紙 級 紛 紜 素 紡 索 紧 紫 紮 累 細 紳 紹 紺 終 絃 組 絆 経 結 絕 絞 絡 絢 給 絨 絮 統 絲 絳 絵 絶 絹 綁 綏 綑 經 継 続 綜 綠 綢 綦 綫 綬 維 綱 網 綴 綵 綸 綺 綻 綽 綾 綿 緊 緋 総 緑 緒 緘 線 緝 緞 締 緣 編 緩 緬 緯 練 緹 緻 縁 縄 縈 縛 縝 縣 縫 縮 縱 縴 縷 總 績 繁 繃 繆 繇 繋 織 繕 繚 繞 繡 繩 繪 繫 繭 繳 繹 繼 繽 纂 續 纍 纏 纓 纔 纖 纜 纠 红 纣 纤 约 级 纨 纪 纫 纬 纭 纯 纰 纱 纲 纳 纵 纶 纷 纸 纹 纺 纽 纾 线 绀 练 组 绅 细 织 终 绊 绍 绎 经 绑 绒 结 绔 绕 绘 给 绚 绛 络 绝 绞 统 绡 绢 绣 绥 绦 继 绩 绪 绫 续 绮 绯 绰 绳 维 绵 绶 绷 绸 绻 综 绽 绾 绿 缀 缄 缅 缆 缇 缈 缉 缎 缓 缔 缕 编 缘 缙 缚 缜 缝 缠 缢 缤 缥 缨 缩 缪 缭 缮 缰 缱 缴 缸 缺 缽 罂 罄 罌 罐 网 罔 罕 罗 罚 罡 罢 罩 罪 置 罰 署 罵 罷 罹 羁 羅 羈 羊 羌 美 羔 羚 羞 羟 羡 羣 群 羥 羧 羨 義 羯 羲 羸 羹 羽 羿 翁 翅 翊 翌 翎 習 翔 翘 翟 翠 翡 翦 翩 翰 翱 翳 翹 翻 翼 耀 老 考 耄 者 耆 耋 而 耍 耐 耒 耕 耗 耘 耙 耦 耨 耳 耶 耷 耸 耻 耽 耿 聂 聆 聊 聋 职 聒 联 聖 聘 聚 聞 聪 聯 聰 聲 聳 聴 聶 職 聽 聾 聿 肃 肄 肅 肆 肇 肉 肋 肌 肏 肓 肖 肘 肚 肛 肝 肠 股 肢 肤 肥 肩 肪 肮 肯 肱 育 肴 肺 肽 肾 肿 胀 胁 胃 胄 胆 背 胍 胎 胖 胚 胛 胜 胝 胞 胡 胤 胥 胧 胫 胭 胯 胰 胱 胳 胴 胶 胸 胺 能 脂 脅 脆 脇 脈 脉 脊 脍 脏 脐 脑 脓 脖 脘 脚 脛 脣 脩 脫 脯 脱 脲 脳 脸 脹 脾 腆 腈 腊 腋 腌 腎 腐 腑 腓 腔 腕 腥 腦 腩 腫 腭 腮 腰 腱 腳 腴 腸 腹 腺 腻 腼 腾 腿 膀 膈 膊 膏 膑 膘 膚 膛 膜 膝 膠 膦 膨 膩 膳 膺 膻 膽 膾 膿 臀 臂 臃 臆 臉 臊 臍 臓 臘 臟 臣 臥 臧 臨 自 臬 臭 至 致 臺 臻 臼 臾 舀 舂 舅 舆 與 興 舉 舊 舌 舍 舎 舐 舒 舔 舖 舗 舛 舜 舞 舟 航 舫 般 舰 舱 舵 舶 舷 舸 船 舺 舾 艇 艋 艘 艙 艦 艮 良 艰 艱 色 艳 艷 艹 艺 艾 节 芃 芈 芊 芋 芍 芎 芒 芙 芜 芝 芡 芥 芦 芩 芪 芫 芬 芭 芮 芯 花 芳 芷 芸 芹 芻 芽 芾 苁 苄 苇 苋 苍 苏 苑 苒 苓 苔 苕 苗 苛 苜 苞 苟 苡 苣 若 苦 苫 苯 英 苷 苹 苻 茁 茂 范 茄 茅 茉 茎 茏 茗 茜 茧 茨 茫 茬 茭 茯 茱 茲 茴 茵 茶 茸 茹 茼 荀 荃 荆 草 荊 荏 荐 荒 荔 荖 荘 荚 荞 荟 荠 荡 荣 荤 荥 荧 荨 荪 荫 药 荳 荷 荸 荻 荼 荽 莅 莆 莉 莊 莎 莒 莓 莖 莘 莞 莠 莢 莧 莪 莫 莱 莲 莴 获 莹 莺 莽 莿 菀 菁 菅 菇 菈 菊 菌 菏 菓 菖 菘 菜 菟 菠 菡 菩 華 菱 菲 菸 菽 萁 萃 萄 萊 萋 萌 萍 萎 萘 萝 萤 营 萦 萧 萨 萩 萬 萱 萵 萸 萼 落 葆 葉 著 葚 葛 葡 董 葦 葩 葫 葬 葭 葯 葱 葳 葵 葷 葺 蒂 蒋 蒐 蒔 蒙 蒜 蒞 蒟 蒡 蒨 蒲 蒸 蒹 蒻 蒼 蒿 蓁 蓄 蓆 蓉 蓋 蓑 蓓 蓖 蓝 蓟 蓦 蓬 蓮 蓼 蓿 蔑 蔓 蔔 蔗 蔘 蔚 蔡 蔣 蔥 蔫 蔬 蔭 蔵 蔷 蔺 蔻 蔼 蔽 蕁 蕃 蕈 蕉 蕊 蕎 蕙 蕤 蕨 蕩 蕪 蕭 蕲 蕴 蕻 蕾 薄 薅 薇 薈 薊 薏 薑 薔 薙 薛 薦 薨 薩 薪 薬 薯 薰 薹 藉 藍 藏 藐 藓 藕 藜 藝 藤 藥 藩 藹 藻 藿 蘆 蘇 蘊 蘋 蘑 蘚 蘭 蘸 蘼 蘿 虎 虏 虐 虑 虔 處 虚 虛 虜 虞 號 虢 虧 虫 虬 虱 虹 虻 虽 虾 蚀 蚁 蚂 蚊 蚌 蚓 蚕 蚜 蚝 蚣 蚤 蚩 蚪 蚯 蚱 蚵 蛀 蛆 蛇 蛊 蛋 蛎 蛐 蛔 蛙 蛛 蛟 蛤 蛭 蛮 蛰 蛳 蛹 蛻 蛾 蜀 蜂 蜃 蜆 蜇 蜈 蜊 蜍 蜒 蜓 蜕 蜗 蜘 蜚 蜜 蜡 蜢 蜥 蜱 蜴 蜷 蜻 蜿 蝇 蝈 蝉 蝌 蝎 蝕 蝗 蝙 蝟 蝠 蝦 蝨 蝴 蝶 蝸 蝼 螂 螃 融 螞 螢 螨 螯 螳 螺 蟀 蟄 蟆 蟋 蟎 蟑 蟒 蟠 蟬 蟲 蟹 蟻 蟾 蠅 蠍 蠔 蠕 蠛 蠟 蠡 蠢 蠣 蠱 蠶 蠹 蠻 血 衄 衅 衆 行 衍 術 衔 街 衙 衛 衝 衞 衡 衢 衣 补 表 衩 衫 衬 衮 衰 衲 衷 衹 衾 衿 袁 袂 袄 袅 袈 袋 袍 袒 袖 袜 袞 袤 袪 被 袭 袱 裁 裂 装 裆 裊 裏 裔 裕 裘 裙 補 裝 裟 裡 裤 裨 裱 裳 裴 裸 裹 製 裾 褂 複 褐 褒 褓 褔 褚 褥 褪 褫 褲 褶 褻 襁 襄 襟 襠 襪 襬 襯 襲 西 要 覃 覆 覇 見 規 覓 視 覚 覦 覧 親 覬 観 覷 覺 覽 觀 见 观 规 觅 视 览 觉 觊 觎 觐 觑 角 觞 解 觥 触 觸 言 訂 計 訊 討 訓 訕 訖 託 記 訛 訝 訟 訣 訥 訪 設 許 訳 訴 訶 診 註 証 詆 詐 詔 評 詛 詞 詠 詡 詢 詣 試 詩 詫 詬 詭 詮 詰 話 該 詳 詹 詼 誅 誇 誉 誌 認 誓 誕 誘 語 誠 誡 誣 誤 誥 誦 誨 說 説 読 誰 課 誹 誼 調 諄 談 請 諏 諒 論 諗 諜 諡 諦 諧 諫 諭 諮 諱 諳 諷 諸 諺 諾 謀 謁 謂 謄 謊 謎 謐 謔 謗 謙 講 謝 謠 謨 謬 謹 謾 譁 證 譎 譏 識 譙 譚 譜 警 譬 譯 議 譲 譴 護 譽 讀 變 讓 讚 讞 计 订 认 讥 讧 讨 让 讪 讫 训 议 讯 记 讲 讳 讴 讶 讷 许 讹 论 讼 讽 设 访 诀 证 诃 评 诅 识 诈 诉 诊 诋 词 诏 译 试 诗 诘 诙 诚 诛 话 诞 诟 诠 诡 询 诣 诤 该 详 诧 诩 诫 诬 语 误 诰 诱 诲 说 诵 诶 请 诸 诺 读 诽 课 诿 谀 谁 调 谄 谅 谆 谈 谊 谋 谌 谍 谎 谏 谐 谑 谒 谓 谔 谕 谗 谘 谙 谚 谛 谜 谟 谢 谣 谤 谥 谦 谧 谨 谩 谪 谬 谭 谯 谱 谲 谴 谶 谷 豁 豆 豇 豈 豉 豊 豌 豎 豐 豔 豚 象 豢 豪 豫 豬 豹 豺 貂 貅 貌 貓 貔 貘 貝 貞 負 財 貢 貧 貨 販 貪 貫 責 貯 貰 貳 貴 貶 買 貸 費 貼 貽 貿 賀 賁 賂 賃 賄 資 賈 賊 賑 賓 賜 賞 賠 賡 賢 賣 賤 賦 質 賬 賭 賴 賺 購 賽 贅 贈 贊 贍 贏 贓 贖 贛 贝 贞 负 贡 财 责 贤 败 账 货 质 贩 贪 贫 贬 购 贮 贯 贰 贱 贲 贴 贵 贷 贸 费 贺 贻 贼 贾 贿 赁 赂 赃 资 赅 赈 赊 赋 赌 赎 赏 赐 赓 赔 赖 赘 赚 赛 赝 赞 赠 赡 赢 赣 赤 赦 赧 赫 赭 走 赳 赴 赵 赶 起 趁 超 越 趋 趕 趙 趟 趣 趨 足 趴 趵 趸 趺 趾 跃 跄 跆 跋 跌 跎 跑 跖 跚 跛 距 跟 跡 跤 跨 跩 跪 路 跳 践 跷 跹 跺 跻 踉 踊 踌 踏 踐 踝 踞 踟 踢 踩 踪 踮 踱 踴 踵 踹 蹂 蹄 蹇 蹈 蹉 蹊 蹋 蹑 蹒 蹙 蹟 蹣 蹤 蹦 蹩 蹬 蹭 蹲 蹴 蹶 蹺 蹼 蹿 躁 躇 躉 躊 躋 躍 躏 躪 身 躬 躯 躲 躺 軀 車 軋 軌 軍 軒 軟 転 軸 軼 軽 軾 較 載 輒 輓 輔 輕 輛 輝 輟 輩 輪 輯 輸 輻 輾 輿 轄 轅 轆 轉 轍 轎 轟 车 轧 轨 轩 转 轭 轮 软 轰 轲 轴 轶 轻 轼 载 轿 较 辄 辅 辆 辇 辈 辉 辊 辍 辐 辑 输 辕 辖 辗 辘 辙 辛 辜 辞 辟 辣 辦 辨 辩 辫 辭 辮 辯 辰 辱 農 边 辺 辻 込 辽 达 迁 迂 迄 迅 过 迈 迎 运 近 返 还 这 进 远 违 连 迟 迢 迤 迥 迦 迩 迪 迫 迭 述 迴 迷 迸 迹 迺 追 退 送 适 逃 逅 逆 选 逊 逍 透 逐 递 途 逕 逗 這 通 逛 逝 逞 速 造 逢 連 逮 週 進 逵 逶 逸 逻 逼 逾 遁 遂 遅 遇 遊 運 遍 過 遏 遐 遑 遒 道 達 違 遗 遙 遛 遜 遞 遠 遢 遣 遥 遨 適 遭 遮 遲 遴 遵 遶 遷 選 遺 遼 遽 避 邀 邁 邂 邃 還 邇 邈 邊 邋 邏 邑 邓 邕 邛 邝 邢 那 邦 邨 邪 邬 邮 邯 邰 邱 邳 邵 邸 邹 邺 邻 郁 郅 郊 郎 郑 郜 郝 郡 郢 郤 郦 郧 部 郫 郭 郴 郵 郷 郸 都 鄂 鄉 鄒 鄔 鄙 鄞 鄢 鄧 鄭 鄰 鄱 鄲 鄺 酉 酊 酋 酌 配 酐 酒 酗 酚 酝 酢 酣 酥 酩 酪 酬 酮 酯 酰 酱 酵 酶 酷 酸 酿 醃 醇 醉 醋 醍 醐 醒 醚 醛 醜 醞 醣 醪 醫 醬 醮 醯 醴 醺 釀 釁 采 釉 释 釋 里 重 野 量 釐 金 釗 釘 釜 針 釣 釦 釧 釵 鈀 鈉 鈍 鈎 鈔 鈕 鈞 鈣 鈦 鈪 鈴 鈺 鈾 鉀 鉄 鉅 鉉 鉑 鉗 鉚 鉛 鉤 鉴 鉻 銀 銃 銅 銑 銓 銖 銘 銜 銬 銭 銮 銳 銷 銹 鋁 鋅 鋒 鋤 鋪 鋰 鋸 鋼 錄 錐 錘 錚 錠 錢 錦 錨 錫 錮 錯 録 錳 錶 鍊 鍋 鍍 鍛 鍥 鍰 鍵 鍺 鍾 鎂 鎊 鎌 鎏 鎔 鎖 鎗 鎚 鎧 鎬 鎮 鎳 鏈 鏖 鏗 鏘 鏞 鏟 鏡 鏢 鏤 鏽 鐘 鐮 鐲 鐳 鐵 鐸 鐺 鑄 鑊 鑑 鑒 鑣 鑫 鑰 鑲 鑼 鑽 鑾 鑿 针 钉 钊 钎 钏 钒 钓 钗 钙 钛 钜 钝 钞 钟 钠 钡 钢 钣 钤 钥 钦 钧 钨 钩 钮 钯 钰 钱 钳 钴 钵 钺 钻 钼 钾 钿 铀 铁 铂 铃 铄 铅 铆 铉 铎 铐 铛 铜 铝 铠 铡 铢 铣 铤 铨 铩 铬 铭 铮 铰 铲 铵 银 铸 铺 链 铿 销 锁 锂 锄 锅 锆 锈 锉 锋 锌 锏 锐 锑 错 锚 锟 锡 锢 锣 锤 锥 锦 锭 键 锯 锰 锲 锵 锹 锺 锻 镀 镁 镂 镇 镉 镌 镍 镐 镑 镕 镖 镗 镛 镜 镣 镭 镯 镰 镳 镶 長 长 門 閃 閉 開 閎 閏 閑 閒 間 閔 閘 閡 関 閣 閥 閨 閩 閱 閲 閹 閻 閾 闆 闇 闊 闌 闍 闔 闕 闖 闘 關 闡 闢 门 闪 闫 闭 问 闯 闰 闲 间 闵 闷 闸 闹 闺 闻 闽 闾 阀 阁 阂 阅 阆 阇 阈 阉 阎 阐 阑 阔 阕 阖 阙 阚 阜 队 阡 阪 阮 阱 防 阳 阴 阵 阶 阻 阿 陀 陂 附 际 陆 陇 陈 陋 陌 降 限 陕 陛 陝 陞 陟 陡 院 陣 除 陨 险 陪 陰 陲 陳 陵 陶 陷 陸 険 陽 隅 隆 隈 隊 隋 隍 階 随 隐 隔 隕 隘 隙 際 障 隠 隣 隧 隨 險 隱 隴 隶 隸 隻 隼 隽 难 雀 雁 雄 雅 集 雇 雉 雋 雌 雍 雎 雏 雑 雒 雕 雖 雙 雛 雜 雞 離 難 雨 雪 雯 雰 雲 雳 零 雷 雹 電 雾 需 霁 霄 霆 震 霈 霉 霊 霍 霎 霏 霑 霓 霖 霜 霞 霧 霭 霰 露 霸 霹 霽 霾 靂 靄 靈 青 靓 靖 静 靚 靛 靜 非 靠 靡 面 靥 靦 革 靳 靴 靶 靼 鞅 鞋 鞍 鞏 鞑 鞘 鞠 鞣 鞦 鞭 韆 韋 韌 韓 韜 韦 韧 韩 韬 韭 音 韵 韶 韻 響 頁 頂 頃 項 順 須 頌 預 頑 頒 頓 頗 領 頜 頡 頤 頫 頭 頰 頷 頸 頹 頻 頼 顆 題 額 顎 顏 顔 願 顛 類 顧 顫 顯 顱 顴 页 顶 顷 项 顺 须 顼 顽 顾 顿 颁 颂 预 颅 领 颇 颈 颉 颊 颌 颍 颐 频 颓 颔 颖 颗 题 颚 颛 颜 额 颞 颠 颡 颢 颤 颦 颧 風 颯 颱 颳 颶 颼 飄 飆 风 飒 飓 飕 飘 飙 飚 飛 飞 食 飢 飨 飩 飪 飯 飲 飼 飽 飾 餃 餅 餉 養 餌 餐 餒 餓 餘 餚 餛 餞 餡 館 餮 餵 餾 饅 饈 饋 饌 饍 饑 饒 饕 饗 饞 饥 饨 饪 饬 饭 饮 饯 饰 饱 饲 饴 饵 饶 饷 饺 饼 饽 饿 馀 馁 馄 馅 馆 馈 馋 馍 馏 馒 馔 首 馗 香 馥 馨 馬 馭 馮 馳 馴 駁 駄 駅 駆 駐 駒 駕 駛 駝 駭 駱 駿 騁 騎 騏 験 騙 騨 騰 騷 驀 驅 驊 驍 驒 驕 驗 驚 驛 驟 驢 驥 马 驭 驮 驯 驰 驱 驳 驴 驶 驷 驸 驹 驻 驼 驾 驿 骁 骂 骄 骅 骆 骇 骈 骊 骋 验 骏 骐 骑 骗 骚 骛 骜 骞 骠 骡 骤 骥 骧 骨 骯 骰 骶 骷 骸 骼 髂 髅 髋 髏 髒 髓 體 髖 高 髦 髪 髮 髯 髻 鬃 鬆 鬍 鬓 鬚 鬟 鬢 鬣 鬥 鬧 鬱 鬼 魁 魂 魄 魅 魇 魍 魏 魔 魘 魚 魯 魷 鮑 鮨 鮪 鮭 鮮 鯉 鯊 鯖 鯛 鯨 鯰 鯽 鰍 鰓 鰭 鰲 鰻 鰾 鱈 鱉 鱔 鱗 鱷 鱸 鱼 鱿 鲁 鲈 鲍 鲑 鲛 鲜 鲟 鲢 鲤 鲨 鲫 鲱 鲲 鲶 鲷 鲸 鳃 鳄 鳅 鳌 鳍 鳕 鳖 鳗 鳝 鳞 鳥 鳩 鳳 鳴 鳶 鴉 鴕 鴛 鴦 鴨 鴻 鴿 鵑 鵜 鵝 鵡 鵬 鵰 鵲 鶘 鶩 鶯 鶴 鷗 鷲 鷹 鷺 鸚 鸞 鸟 鸠 鸡 鸢 鸣 鸥 鸦 鸨 鸪 鸭 鸯 鸳 鸵 鸽 鸾 鸿 鹂 鹃 鹄 鹅 鹈 鹉 鹊 鹌 鹏 鹑 鹕 鹘 鹜 鹞 鹤 鹦 鹧 鹫 鹭 鹰 鹳 鹵 鹹 鹼 鹽 鹿 麂 麋 麒 麓 麗 麝 麟 麥 麦 麩 麴 麵 麸 麺 麻 麼 麽 麾 黃 黄 黍 黎 黏 黑 黒 黔 默 黛 黜 黝 點 黠 黨 黯 黴 鼋 鼎 鼐 鼓 鼠 鼬 鼹 鼻 鼾 齁 齊 齋 齐 齒 齡 齢 齣 齦 齿 龄 龅 龈 龊 龋 龌 龍 龐 龔 龕 龙 龚 龛 龜 龟 ︰ ︱ ︶ ︿ ﹁ ﹂ ﹍ ﹏ ﹐ ﹑ ﹒ ﹔ ﹕ ﹖ ﹗ ﹙ ﹚ ﹝ ﹞ ﹡ ﹣ ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ 。 「 」 、 ・ ッ ー イ ク シ ス ト ノ フ ラ ル ン ゙ ゚  ̄ ¥ 👍 🔥 😂 😎 ... yam 10 2017 12 11 2016 20 30 15 06 lofter ##s 2015 by 16 14 18 13 24 17 2014 21 ##0 22 19 25 23 com 100 00 05 2013 ##a 03 09 08 28 ##2 50 01 04 ##1 27 02 2012 ##3 26 ##e 07 ##8 ##5 ##6 ##4 ##9 ##7 29 2011 40 ##t 2010 ##o ##d ##i 2009 ##n app www the ##m 31 ##c ##l ##y ##r ##g 2008 60 http 200 qq ##p 80 ##f google pixnet 90 cookies tripadvisor 500 ##er ##k 35 ##h facebook 2007 2000 70 ##b of ##x ##u 45 300 iphone 32 1000 2006 48 ip 36 in 38 3d ##w ##ing 55 ctrip ##on ##v 33 ##の to 34 400 id 2005 it 37 windows llc top 99 42 39 000 led at ##an 41 51 52 46 49 43 53 44 ##z android 58 and 59 2004 56 vr ##か 5000 2003 47 blogthis twitter 54 ##le 150 ok 2018 57 75 cn no ios ##in ##mm ##00 800 on te 3000 65 2001 360 95 ig lv 120 ##ng ##を ##us ##に pc てす ── 600 ##te 85 2002 88 ##ed html ncc wifi email 64 blog is ##10 ##て mail online ##al dvd ##ic studio ##は ##℃ ##ia ##と line vip 72 ##q 98 ##ce ##en for ##is ##ra ##es ##j usb net cp 1999 asia 4g ##cm diy new 3c ##お ta 66 language vs apple tw 86 web ##ne ipad 62 you ##re 101 68 ##tion ps de bt pony atm ##2017 1998 67 ##ch ceo ##or go ##na av pro cafe 96 pinterest 97 63 pixstyleme3c ##ta more said ##2016 1997 mp3 700 ##ll nba jun ##20 92 tv 1995 pm 61 76 nbsp 250 ##ie linux ##ma cd 110 hd ##17 78 ##ion 77 6000 am ##th ##st 94 ##se ##et 69 180 gdp my 105 81 abc 89 flash 79 one 93 1990 1996 ##ck gps ##も ##ly web885 106 2020 91 ##ge 4000 1500 xd boss isbn 1994 org ##ry me love ##11 0fork 73 ##12 3g ##ter ##ar 71 82 ##la hotel 130 1970 pk 83 87 140 ie ##os ##30 ##el 74 ##50 seo cpu ##ml p2p 84 may ##る sun tue internet cc posted youtube ##at ##ン ##man ii ##ル ##15 abs nt pdf yahoo ago 1980 ##it news mac 104 ##てす ##me ##り java 1992 spa ##de ##nt hk all plus la 1993 ##mb ##16 ##ve west ##da 160 air ##い ##ps から ##to 1989 logo htc php https fi momo ##son sat ##ke ##80 ebd suv wi day apk ##88 ##um mv galaxy wiki or brake ##ス 1200 する this 1991 mon ##こ ❤2017 po ##ない javascript life home june ##ss system 900 ##ー ##0 pp 1988 world fb 4k br ##as ic ai leonardo safari ##60 live free xx wed win7 kiehl ##co lg o2o ##go us 235 1949 mm しい vfm kanye ##90 ##2015 ##id jr ##ey 123 rss ##sa ##ro ##am ##no thu fri 350 ##sh ##ki 103 comments name ##のて ##pe ##ine max 1987 8000 uber ##mi ##ton wordpress office 1986 1985 ##ment 107 bd win10 ##ld ##li gmail bb dior ##rs ##ri ##rd ##ます up cad ##® dr して read ##21 をお ##io ##99 url 1984 pvc paypal show policy ##40 ##ty ##18 with ##★ ##01 txt 102 ##ba dna from post mini ar taiwan john ##ga privacy agoda ##13 ##ny word ##24 ##22 ##by ##ur ##hz 1982 ##ang 265 cookie netscape 108 ##ka ##~ ##ad house share note ibm code hello nike sim survey ##016 1979 1950 wikia ##32 ##017 5g cbc ##tor ##kg 1983 ##rt ##14 campaign store 2500 os ##ct ##ts ##° 170 api ##ns 365 excel ##な ##ao ##ら ##し ~~ ##nd university 163 には 518 ##70 ##ya ##il ##25 pierre ipo 0020 897 ##23 hotels ##ian のお 125 years 6606 ##ers ##26 high ##day time ##ay bug ##line ##く ##す ##be xp talk2yam yamservice 10000 coco ##dy sony ##ies 1978 microsoft david people ##ha 1960 instagram intel その ##ot iso 1981 ##va 115 ##mo ##land xxx man co ltxsw ##ation baby 220 ##pa ##ol 1945 7000 tag 450 ##ue msn ##31 oppo ##ト ##ca control ##om st chrome ##ure ##ん be ##き lol ##19 した ##bo 240 lady ##100 ##way ##から 4600 ##ko ##do ##un 4s corporation 168 ##ni herme ##28 cp 978 ##up ##06 ui ##ds ppt admin three します bbc re 128 ##48 ca ##015 ##35 hp ##ee tpp ##た ##ive ×× root ##cc ##ました ##ble ##ity adobe park 114 et oled city ##ex ##ler ##ap china ##book 20000 view ##ice global ##km your hong ##mg out ##ms ng ebay ##29 menu ubuntu ##cy rom ##view open ktv do server ##lo if english ##ね ##5 ##oo 1600 ##02 step1 kong club 135 july inc 1976 mr hi ##net touch ##ls ##ii michael lcd ##05 ##33 phone james step2 1300 ios9 ##box dc ##2 ##ley samsung 111 280 pokemon css ##ent ##les いいえ ##1 s8 atom play bmw ##said sa etf ctrl ♥yoyo♥ ##55 2025 ##2014 ##66 adidas amazon 1958 ##ber ##ner visa ##77 ##der 1800 connectivity ##hi firefox 109 118 hr so style mark pop ol skip 1975 as ##27 ##ir ##61 190 mba ##う ##ai le ##ver 1900 cafe2017 lte super 113 129 ##ron amd like ##☆ are ##ster we ##sk paul data international ##ft longchamp ssd good ##ート ##ti reply ##my ↓↓↓ apr star ##ker source 136 js 112 get force photo ##one 126 ##2013 ##ow link bbs 1972 goods ##lin python 119 ##ip game ##ics ##ません blue ##● 520 ##45 page itunes ##03 1955 260 1968 gt gif 618 ##ff ##47 group くたさい about bar ganji ##nce music lee not 1977 1971 1973 ##per an faq comment ##って days ##ock 116 ##bs 1974 1969 v1 player 1956 xbox sql fm f1 139 ##ah 210 ##lv ##mp ##000 melody 1957 ##3 550 17life 199 1966 xml market ##au ##71 999 ##04 what gl ##95 ##age tips ##68 book ##ting mysql can 1959 230 ##ung wonderland watch 10℃ ##ction 9000 mar mobile 1946 1962 article ##db part ▲top party って 1967 1964 1948 ##07 ##ore ##op この dj ##78 ##38 010 main 225 1965 ##ong art 320 ad 134 020 ##73 117 pm2 japan 228 ##08 ts 1963 ##ica der sm ##36 2019 ##wa ct ##7 ##や ##64 1937 homemesh search ##85 ##れは ##tv ##di macbook ##9 ##くたさい service ##♥ type った 750 ##ier ##si ##75 ##います ##ok best ##ット goris lock ##った cf 3m big ##ut ftp carol ##vi 10 1961 happy sd ##ac 122 anti pe cnn iii 1920 138 ##ラ 1940 esp jan tags ##98 ##51 august vol ##86 154 ##™ ##fs ##れ ##sion design ac ##ム press jordan ppp that key check ##6 ##tt ##㎡ 1080p ##lt power ##42 1952 ##bc vivi ##ック he 133 121 jpg ##rry 201 175 3500 1947 nb ##ted ##rn しています 1954 usd ##t00 master ##ンク 001 model ##58 al ##09 1953 ##34 ram goo ても ##ui 127 1930 red ##ary rpg item ##pm ##41 270 ##za project ##2012 hot td blogabstract ##ger ##62 650 ##44 gr2 ##します ##m black electronic nfc year asus また html5 cindy ##hd m3 132 esc ##od booking ##53 fed tvb ##81 ##ina mit 165 ##いる chan 192 distribution next になる peter bios steam cm 1941 にも pk10 ##ix ##65 ##91 dec nasa ##ana icecat 00z b1 will ##46 li se ##ji ##み ##ard oct ##ain jp ##ze ##bi cio ##56 smart h5 ##39 ##port curve vpn ##nm ##dia utc ##あり 12345678910 ##52 rmvb chanel a4 miss ##and ##im media who ##63 she girl 5s 124 vera ##して class vivo king ##フ ##ei national ab 1951 5cm 888 145 ipod ap 1100 5mm 211 ms 2756 ##69 mp4 msci ##po ##89 131 mg index 380 ##bit ##out ##zz ##97 ##67 158 apec ##8 photoshop opec ¥799 ては ##96 ##tes ##ast 2g ○○ ##ール ¥2899 ##ling ##よ ##ory 1938 ##ical kitty content ##43 step3 ##cn win8 155 vc 1400 iphone7 robert ##した tcl 137 beauty ##87 en dollars ##ys ##oc step pay yy a1 ##2011 ##lly ##ks ##♪ 1939 188 download 1944 sep exe ph います school gb center pr street ##board uv ##37 ##lan winrar ##que ##ua ##com 1942 1936 480 gpu ##4 ettoday fu tom ##54 ##ren ##via 149 ##72 b2b 144 ##79 ##tch rose arm mb ##49 ##ial ##nn nvidia step4 mvp 00㎡ york 156 ##イ how cpi 591 2765 gov kg joe ##xx mandy pa ##ser copyright fashion 1935 don ##け ecu ##ist ##art erp wap have ##lm talk ##ek ##ning ##if ch ##ite video 1943 cs san iot look ##84 ##2010 ##ku october ##ux trump ##hs ##ide box 141 first ##ins april ##ight ##83 185 angel protected aa 151 162 x1 m2 ##fe ##× ##ho size 143 min ofo fun gomaji ex hdmi food dns march chris kevin ##のか ##lla ##pp ##ec ag ems 6s 720p ##rm ##ham off ##92 asp team fandom ed 299 ▌♥ ##ell info されています ##82 sina 4066 161 ##able ##ctor 330 399 315 dll rights ltd idc jul 3kg 1927 142 ma surface ##76 ##ク ~~~ 304 mall eps 146 green ##59 map space donald v2 sodu ##light 1931 148 1700 まて 310 reserved htm ##han ##57 2d 178 mod ##ise ##tions 152 ti ##shi doc 1933 icp 055 wang ##ram shopping aug ##pi ##well now wam b2 からお ##hu 236 1928 ##gb 266 f2 ##93 153 mix ##ef ##uan bwl ##plus ##res core ##ess tea 5℃ hktvmall nhk ##ate list ##ese 301 feb 4m inn ての nov 159 12345 daniel ##ci pass ##bet ##nk coffee 202 ssl airbnb ##ute fbi woshipm skype ea cg sp ##fc ##www yes edge alt 007 ##94 fpga ##ght ##gs iso9001 さい ##ile ##wood ##uo image lin icon american ##em 1932 set says ##king ##tive blogger ##74 なと 256 147 ##ox ##zy ##red ##ium ##lf nokia claire ##リ ##ding november lohas ##500 ##tic ##マ ##cs ##ある ##che ##ire ##gy ##ult db january win ##カ 166 road ptt ##ま ##つ 198 ##fa ##mer anna pchome はい udn ef 420 ##time ##tte 2030 ##ア g20 white かかります 1929 308 garden eleven di ##おります chen 309b 777 172 young cosplay ちてない 4500 bat ##123 ##tra ##ては kindle npc steve etc ##ern ##| call xperia ces travel sk s7 ##ous 1934 ##int みいたたけます 183 edu file cho qr ##car ##our 186 ##ant ##d eric 1914 rends ##jo ##する mastercard ##2000 kb ##min 290 ##ino vista ##ris ##ud jack 2400 ##set 169 pos 1912 ##her ##ou taipei しく 205 beta ##ませんか 232 ##fi express 255 body ##ill aphojoy user december meiki ##ick tweet richard ##av ##ᆫ iphone6 ##dd ちてすか views ##mark 321 pd ##00 times ##▲ level ##ash 10g point 5l ##ome 208 koreanmall ##ak george q2 206 wma tcp ##200 スタッフ full mlb ##lle ##watch tm run 179 911 smith business ##und 1919 color ##tal 222 171 ##less moon 4399 ##rl update pcb shop 499 157 little なし end ##mhz van dsp easy 660 ##house ##key history ##o oh ##001 ##hy ##web oem let was ##2009 ##gg review ##wan 182 ##°c 203 uc title ##val united 233 2021 ##ons doi trivago overdope sbs ##ance ##ち grand special 573032185 imf 216 wx17house ##so ##ーム audi ##he london william ##rp ##ake science beach cfa amp ps4 880 ##800 ##link ##hp crm ferragamo bell make ##eng 195 under zh photos 2300 ##style ##ント via 176 da ##gi company i7 ##ray thomas 370 ufo i5 ##max plc ben back research 8g 173 mike ##pc ##ッフ september 189 ##ace vps february 167 pantos wp lisa 1921 ★★ jquery night long offer ##berg ##news 1911 ##いて ray fks wto せます over 164 340 ##all ##rus 1924 ##888 ##works blogtitle loftpermalink ##→ 187 martin test ling km ##め 15000 fda v3 ##ja ##ロ wedding かある outlet family ##ea をこ ##top story ##ness salvatore ##lu 204 swift 215 room している oracle ##ul 1925 sam b2c week pi rock ##のは ##a ##けと ##ean ##300 ##gle cctv after chinese ##back powered x2 ##tan 1918 ##nes ##イン canon only 181 ##zi ##las say ##oe 184 ##sd 221 ##bot ##world ##zo sky made top100 just 1926 pmi 802 234 gap ##vr 177 les 174 ▲topoct ball vogue vi ing ofweek cos ##list ##ort ▲topmay ##なら ##lon として last ##tc ##of ##bus ##gen real eva ##コ a3 nas ##lie ##ria ##coin ##bt ▲topapr his 212 cat nata vive health ⋯⋯ drive sir ▲topmar du cup ##カー ##ook ##よう ##sy alex msg tour しました 3ce ##word 193 ebooks r8 block 318 ##より 2200 nice pvp 207 months 1905 rewards ##ther 1917 0800 ##xi ##チ ##sc micro 850 gg blogfp op 1922 daily m1 264 true ##bb ml ##tar ##のお ##ky anthony 196 253 ##yo state 218 ##ara ##aa ##rc ##tz ##ston より gear ##eo ##ade ge see 1923 ##win ##ura ss heart ##den ##ita down ##sm el png 2100 610 rakuten whatsapp bay dream add ##use 680 311 pad gucci mpv ##ode ##fo island ▲topjun ##▼ 223 jason 214 chicago ##❤ しの ##hone io ##れる ##ことか sogo be2 ##ology 990 cloud vcd ##con 2~3 ##ford ##joy ##kb ##こさいます ##rade but ##ach docker ##ful rfid ul ##ase hit ford ##star 580 ##○ 11 a2 sdk reading edited ##are cmos ##mc 238 siri light ##ella ##ため bloomberg ##read pizza ##ison jimmy ##vm college node journal ba 18k ##play 245 ##cer 20 magic ##yu 191 jump 288 tt ##ings asr ##lia 3200 step5 network ##cd mc いします 1234 pixstyleme 273 ##600 2800 money ★★★★★ 1280 12 430 bl みの act ##tus tokyo ##rial ##life emba ##ae saas tcs ##rk ##wang summer ##sp ko ##ving 390 premium ##その netflix ##ヒ uk mt ##lton right frank two 209 える ##ple ##cal 021 ##んな ##sen ##ville hold nexus dd ##ius てお ##mah ##なく tila zero 820 ce ##tin resort ##ws charles old p10 5d report ##360 ##ru ##には bus vans lt ##est pv ##レ links rebecca ##ツ ##dm azure ##365 きな limited bit 4gb ##mon 1910 moto ##eam 213 1913 var eos なとの 226 blogspot された 699 e3 dos dm fc ##ments ##ik ##kw boy ##bin ##ata 960 er ##せ 219 ##vin ##tu ##ula 194 ##∥ station ##ろ ##ature 835 files zara hdr top10 nature 950 magazine s6 marriott ##シ avira case ##っと tab ##ran tony ##home oculus im ##ral jean saint cry 307 rosie ##force ##ini ice ##bert のある ##nder ##mber pet 2600 ##◆ plurk ▲topdec ##sis 00kg ▲topnov 720 ##ence tim ##ω ##nc ##ても ##name log ips great ikea malaysia unix ##イト 3600 ##ncy ##nie 12000 akb48 ##ye ##oid 404 ##chi ##いた oa xuehai ##1000 ##orm ##rf 275 さん ##ware ##リー 980 ho ##pro text ##era 560 bob 227 ##ub ##2008 8891 scp avi ##zen 2022 mi wu museum qvod apache lake jcb ▲topaug ★★★ ni ##hr hill 302 ne weibo 490 ruby ##ーシ ##ヶ ##row 4d ▲topjul iv ##ish github 306 mate 312 ##スト ##lot ##ane andrew のハイト ##tina t1 rf ed2k ##vel ##900 way final りの ns 5a 705 197 ##メ sweet bytes ##ene ▲topjan 231 ##cker ##2007 ##px 100g topapp 229 helpapp rs low 14k g4g care 630 ldquo あり ##fork leave rm edition ##gan ##zon ##qq ▲topsep ##google ##ism gold 224 explorer ##zer toyota category select visual ##labels restaurant ##md posts s1 ##ico もっと angelababy 123456 217 sports s3 mbc 1915 してくたさい shell x86 candy ##new kbs face xl 470 ##here 4a swissinfo v8 ▲topfeb dram ##ual ##vice 3a ##wer sport q1 ios10 public int card ##c ep au rt ##れた 1080 bill ##mll kim 30 460 wan ##uk ##ミ x3 298 0t scott ##ming 239 e5 ##3d h7n9 worldcat brown ##あります ##vo ##led ##580 ##ax 249 410 ##ert paris ##~6 polo 925 ##lr 599 ##ナ capital ##hing bank cv 1g ##chat ##s ##たい adc ##ule 2m ##e digital hotmail 268 ##pad 870 bbq quot ##ring before wali ##まて mcu 2k 2b という costco 316 north 333 switch ##city ##p philips ##mann management panasonic ##cl ##vd ##ping ##rge alice ##lk ##ましょう css3 ##ney vision alpha ##ular ##400 ##tter lz にお ##ありません mode gre 1916 pci ##tm 237 1~2 ##yan ##そ について ##let ##キ work war coach ah mary ##ᅵ huang ##pt a8 pt follow ##berry 1895 ##ew a5 ghost ##ション ##wn ##og south ##code girls ##rid action villa git r11 table games ##cket error ##anonymoussaid ##ag here ##ame ##gc qa ##■ ##lis gmp ##gin vmalife ##cher yu wedding ##tis demo dragon 530 soho social bye ##rant river orz acer 325 ##↑ ##ース ##ats 261 del ##ven 440 ups ##ように ##ター 305 value macd yougou ##dn 661 ##ano ll ##urt ##rent continue script ##wen ##ect paper 263 319 shift ##chel ##フト ##cat 258 x5 fox 243 ##さん car aaa ##blog loading ##yn ##tp kuso 799 si sns イカせるテンマ ヒンクテンマ3 rmb vdc forest central prime help ultra ##rmb ##ような 241 square 688 ##しい のないフロクに ##field ##reen ##ors ##ju c1 start 510 ##air ##map cdn ##wo cba stephen m8 100km ##get opera ##base ##ood vsa com™ ##aw ##ail 251 なのて count t2 ##ᅡ ##een 2700 hop ##gp vsc tree ##eg ##ose 816 285 ##ories ##shop alphago v4 1909 simon ##ᆼ fluke62max zip スホンサー ##sta louis cr bas ##~10 bc ##yer hadoop ##ube ##wi 1906 0755 hola ##low place centre 5v d3 ##fer 252 ##750 ##media 281 540 0l exchange 262 series ##ハー ##san eb ##bank ##k q3 ##nge ##mail take ##lp 259 1888 client east cache event vincent ##ールを きを ##nse sui 855 adchoice ##и ##stry ##なたの 246 ##zone ga apps sea ##ab 248 cisco ##タ ##rner kymco ##care dha ##pu ##yi minkoff royal p1 への annie 269 collection kpi playstation 257 になります 866 bh ##bar queen 505 radio 1904 andy armani ##xy manager iherb ##ery ##share spring raid johnson 1908 ##ob volvo hall ##ball v6 our taylor ##hk bi 242 ##cp kate bo water technology ##rie サイトは 277 ##ona ##sl hpv 303 gtx hip rdquo jayz stone ##lex ##rum namespace ##やり 620 ##ale ##atic des ##erson ##ql ##ves ##type enter ##この ##てきます d2 ##168 ##mix ##bian との a9 jj ky ##lc access movie ##hc リストに tower ##ration ##mit ます ##nch ua tel prefix ##o2 1907 ##point 1901 ott ~10 ##http ##ury baidu ##ink member ##logy bigbang nownews ##js ##shot ##tb ##こと 247 eba ##tics ##lus ける v5 spark ##ama there ##ions god ##lls ##down hiv ##ress burberry day2 ##kv ◆◆ jeff related film edit joseph 283 ##ark cx 32gb order g9 30000 ##ans ##tty s5 ##bee かあります thread xr buy sh 005 land spotify mx ##ari 276 ##verse ×email sf why ##ことて 244 7headlines nego sunny dom exo 401 666 positioning fit rgb ##tton 278 kiss alexa adam lp みリストを ##g mp ##ties ##llow amy ##du np 002 institute 271 ##rth ##lar 2345 590 ##des sidebar 15 imax site ##cky ##kit ##ime ##009 season 323 ##fun ##ンター ##ひ gogoro a7 pu lily fire twd600 ##ッセーシを いて ##vis 30ml ##cture ##をお information ##オ close friday ##くれる yi nick てすか ##tta ##tel 6500 ##lock cbd economy 254 かお 267 tinker double 375 8gb voice ##app oops channel today 985 ##right raw xyz ##+ jim edm ##cent 7500 supreme 814 ds ##its ##asia dropbox ##てすか ##tti books 272 100ml ##tle ##ller ##ken ##more ##boy sex 309 ##dom t3 ##ider ##なります ##unch 1903 810 feel 5500 ##かった ##put により s2 mo ##gh men ka amoled div ##tr ##n1 port howard ##tags ken dnf ##nus adsense ##а ide ##へ buff thunder ##town ##ique has ##body auto pin ##erry tee てした 295 number ##the ##013 object psp cool udnbkk 16gb ##mic miui ##tro most r2 ##alk ##nity 1880 ±0 ##いました 428 s4 law version ##oa n1 sgs docomo ##tf ##ack henry fc2 ##ded ##sco ##014 ##rite 286 0mm linkedin ##ada ##now wii ##ndy ucbug ##◎ sputniknews legalminer ##ika ##xp 2gb ##bu q10 oo b6 come ##rman cheese ming maker ##gm nikon ##fig ppi kelly ##ります jchere てきます ted md 003 fgo tech ##tto dan soc ##gl ##len hair earth 640 521 img ##pper ##a1 ##てきる ##ロク acca ##ition ##ference suite ##ig outlook ##mond ##cation 398 ##pr 279 101vip 358 ##999 282 64gb 3800 345 airport ##over 284 ##おり jones ##ith lab ##su ##いるのて co2 town piece ##llo no1 vmware 24h ##qi focus reader ##admin ##ora tb false ##log 1898 know lan 838 ##ces f4 ##ume motel stop ##oper na flickr netcomponents ##af ##─ pose williams local ##ound ##cg ##site ##iko いお 274 5m gsm con ##ath 1902 friends ##hip cell 317 ##rey 780 cream ##cks 012 ##dp facebooktwitterpinterestgoogle sso 324 shtml song swiss ##mw ##キンク lumia xdd string tiffany 522 marc られた insee russell sc dell ##ations ok camera 289 ##vs ##flow ##late classic 287 ##nter stay g1 mtv 512 ##ever ##lab ##nger qe sata ryan d1 50ml cms ##cing su 292 3300 editor 296 ##nap security sunday association ##ens ##700 ##bra acg ##かり sofascore とは mkv ##ign jonathan gary build labels ##oto tesla moba qi gohappy general ajax 1024 ##かる サイト society ##test ##urs wps fedora ##ich mozilla 328 ##480 ##dr usa urn ##lina ##r grace ##die ##try ##ader 1250 ##なり elle 570 ##chen ##ᆯ price ##ten uhz ##ough eq ##hen states push session balance wow 506 ##cus ##py when ##ward ##ep 34e wong library prada ##サイト ##cle running ##ree 313 ck date q4 ##ctive ##ool ##> mk ##ira ##163 388 die secret rq dota buffet は1ヶ e6 ##ez pan 368 ha ##card ##cha 2a ##さ alan day3 eye f3 ##end france keep adi rna tvbs ##ala solo nova ##え ##tail ##ょう support ##ries ##なる ##ved base copy iis fps ##ways hero hgih profile fish mu ssh entertainment chang ##wd click cake ##ond pre ##tom kic pixel ##ov ##fl product 6a ##pd dear ##gate es yumi audio ##² ##sky echo bin where ##ture 329 ##ape find sap isis ##なと nand ##101 ##load ##ream band a6 525 never ##post festival 50cm ##we 555 guide 314 zenfone ##ike 335 gd forum jessica strong alexander ##ould software allen ##ious program 360° else lohasthree ##gar することかてきます please ##れます rc ##ggle ##ric bim 50000 ##own eclipse 355 brian 3ds ##side 061 361 ##other ##ける ##tech ##ator 485 engine ##ged ##t plaza ##fit cia ngo westbrook shi tbs 50mm ##みませんか sci 291 reuters ##ily contextlink ##hn af ##cil bridge very ##cel 1890 cambridge ##ize 15g ##aid ##data 790 frm ##head award butler ##sun meta ##mar america ps3 puma pmid ##すか lc 670 kitchen ##lic オーフン5 きなしソフトサーヒス そして day1 future ★★★★ ##text ##page ##rris pm1 ##ket fans ##っています 1001 christian bot kids trackback ##hai c3 display ##hl n2 1896 idea さんも ##sent airmail ##ug ##men pwm けます 028 ##lution 369 852 awards schemas 354 asics wikipedia font ##tional ##vy c2 293 ##れている ##dget ##ein っている contact pepper スキル 339 ##~5 294 ##uel ##ument 730 ##hang みてす q5 ##sue rain ##ndi wei swatch ##cept わせ 331 popular ##ste ##tag p2 501 trc 1899 ##west ##live justin honda ping messenger ##rap v9 543 ##とは unity appqq はすへて 025 leo ##tone ##テ ##ass uniqlo ##010 502 her jane memory moneydj ##tical human 12306 していると ##m2 coc miacare ##mn tmt ##core vim kk ##may fan target use too 338 435 2050 867 737 fast ##2c services ##ope omega energy ##わ pinkoi 1a ##なから ##rain jackson ##ement ##シャンルの 374 366 そんな p9 rd ##ᆨ 1111 ##tier ##vic zone ##│ 385 690 dl isofix cpa m4 322 kimi めて davis ##lay lulu ##uck 050 weeks qs ##hop 920 ##n ae ##ear ~5 eia 405 ##fly korea jpeg boost ##ship small ##リア 1860 eur 297 425 valley ##iel simple ##ude rn k2 ##ena されます non patrick しているから ##ナー feed 5757 30g process well qqmei ##thing they aws lu pink ##ters ##kin または board ##vertisement wine ##ien unicode ##dge r1 359 ##tant いを ##twitter ##3c cool1 される ##れて ##l isp ##012 standard 45㎡2 402 ##150 matt ##fu 326 ##iner googlemsn pixnetfacebookyahoo ##ラン x7 886 ##uce メーカー sao ##ev ##きました ##file 9678 403 xddd shirt 6l ##rio ##hat 3mm givenchy ya bang ##lio monday crystal ロクイン ##abc 336 head 890 ubuntuforumwikilinuxpastechat ##vc ##~20 ##rity cnc 7866 ipv6 null 1897 ##ost yang imsean tiger ##fet ##ンス 352 ##= dji 327 ji maria ##come ##んて foundation 3100 ##beth ##なった 1m 601 active ##aft ##don 3p sr 349 emma ##khz living 415 353 1889 341 709 457 sas x6 ##face pptv x4 ##mate han sophie ##jing 337 fifa ##mand other sale inwedding ##gn てきちゃいます ##mmy ##pmlast bad nana nbc してみてくたさいね なとはお ##wu ##かあります ##あ note7 single ##340 せからこ してくたさい♪この しにはとんとんワークケートを するとあなたにもっとマッチした ならワークケートへ もみつかっちゃうかも ワークケートの ##bel window ##dio ##ht union age 382 14 ##ivity ##y コメント domain neo ##isa ##lter 5k f5 steven ##cts powerpoint tft self g2 ft ##テル zol ##act mwc 381 343 もう nbapop 408 てある eds ace ##room previous author tomtom il ##ets hu financial ☆☆☆ っています bp 5t chi 1gb ##hg fairmont cross 008 gay h2 function ##けて 356 also 1b 625 ##ータ ##raph 1894 3~5 ##ils i3 334 avenue ##host による ##bon ##tsu message navigation 50g fintech h6 ##ことを 8cm ##ject ##vas ##firm credit ##wf xxxx form ##nor ##space huawei plan json sbl ##dc machine 921 392 wish ##120 ##sol windows7 edward ##ために development washington ##nsis lo 818 ##sio ##ym ##bor planet ##~8 ##wt ieee gpa ##めて camp ann gm ##tw ##oka connect ##rss ##work ##atus wall chicken soul 2mm ##times fa ##ather ##cord 009 ##eep hitachi gui harry ##pan e1 disney ##press ##ーション wind 386 frigidaire ##tl liu hsu 332 basic von ev いた てきる スホンサーサイト learning ##ull expedia archives change ##wei santa cut ins 6gb turbo brand cf1 508 004 return 747 ##rip h1 ##nis ##をこ 128gb ##にお 3t application しており emc rx ##oon 384 quick 412 15058 wilson wing chapter ##bug beyond ##cms ##dar ##oh zoom e2 trip sb ##nba rcep 342 aspx ci 080 gc gnu める ##count advanced dance dv ##url ##ging 367 8591 am09 shadow battle 346 ##i ##cia ##という emily ##のてす ##tation host ff techorz sars ##mini ##mporary ##ering nc 4200 798 ##next cma ##mbps ##gas ##ift ##dot ##ィ 455 ##~17 amana ##りの 426 ##ros ir 00㎡1 ##eet ##ible ##↓ 710 ˋ▽ˊ ##aka dcs iq ##v l1 ##lor maggie ##011 ##iu 588 ##~1 830 ##gt 1tb articles create ##burg ##iki database fantasy ##rex ##cam dlc dean ##you hard path gaming victoria maps cb ##lee ##itor overchicstoretvhome systems ##xt 416 p3 sarah 760 ##nan 407 486 x9 install second 626 ##ann ##ph ##rcle ##nic 860 ##nar ec ##とう 768 metro chocolate ##rian ~4 ##table ##しています skin ##sn 395 mountain ##0mm inparadise 6m 7x24 ib 4800 ##jia eeworld creative g5 g3 357 parker ecfa village からの 18000 sylvia サーヒス hbl ##ques ##onsored ##x2 ##きます ##v4 ##tein ie6 383 ##stack 389 ver ##ads ##baby sound bbe ##110 ##lone ##uid ads 022 gundam 351 thinkpad 006 scrum match ##ave mems ##470 ##oy ##なりました ##talk glass lamigo span ##eme job ##a5 jay wade kde 498 ##lace ocean tvg ##covery ##r3 ##ners ##rea junior think ##aine cover ##ision ##sia ↓↓ ##bow msi 413 458 406 ##love 711 801 soft z2 ##pl 456 1840 mobil mind ##uy 427 nginx ##oi めた ##rr 6221 ##mple ##sson ##ーシてす 371 ##nts 91tv comhd crv3000 ##uard 1868 397 deep lost field gallery ##bia rate spf redis traction 930 icloud 011 なら fe jose 372 ##tory into sohu fx 899 379 kicstart2 ##hia すく ##~3 ##sit ra 24 ##walk ##xure 500g ##pact pacific xa natural carlo ##250 ##walker 1850 ##can cto gigi 516 ##サー pen ##hoo ob matlab ##b ##yy 13913459 ##iti mango ##bbs sense c5 oxford ##ニア walker jennifer ##ola course ##bre 701 ##pus ##rder lucky 075 ##ぁ ivy なお ##nia sotheby side ##ugh joy ##orage ##ush ##bat ##dt 364 r9 ##2d ##gio 511 country wear ##lax ##~7 ##moon 393 seven study 411 348 lonzo 8k ##ェ evolution ##イフ ##kk gs kd ##レス arduino 344 b12 ##lux arpg ##rdon cook ##x5 dark five ##als ##ida とても sign 362 ##ちの something 20mm ##nda 387 ##posted fresh tf 1870 422 cam ##mine ##skip ##form ##ssion education 394 ##tee dyson stage ##jie want ##night epson pack あります ##ppy テリヘル ##█ wd ##eh ##rence left ##lvin golden mhz discovery ##trix ##n2 loft ##uch ##dra ##sse speed ~1 1mdb sorry welcome ##urn wave gaga ##lmer teddy ##160 トラックハック せよ 611 ##f2016 378 rp ##sha rar ##あなたに ##きた 840 holiday ##ュー 373 074 ##vg ##nos ##rail gartner gi 6p ##dium kit 488 b3 eco ##ろう 20g sean ##stone autocad nu ##np f16 write 029 m5 ##ias images atp ##dk fsm 504 1350 ve 52kb ##xxx ##のに ##cake 414 unit lim ru 1v ##ification published angela 16g analytics ak ##q ##nel gmt ##icon again ##₂ ##bby ios11 445 かこさいます waze いてす ##ハ 9985 ##ust ##ティー framework ##007 iptv delete 52sykb cl wwdc 027 30cm ##fw ##ての 1389 ##xon brandt ##ses ##dragon tc vetements anne monte modern official ##へて ##ere ##nne ##oud もちろん 50 etnews ##a2 ##graphy 421 863 ##ちゃん 444 ##rtex ##てお l2 ##gma mount ccd たと archive morning tan ddos e7 ##ホ day4 ##ウ gis 453 its 495 factory bruce pg ##ito ってくたさい guest cdma ##lling 536 n3 しかし 3~4 mega eyes ro 13 women dac church ##jun singapore ##facebook 6991 starbucks ##tos ##stin ##shine zen ##mu tina 20℃ 1893 ##たけて 503 465 request ##gence qt ##っ 1886 347 363 q7 ##zzi diary ##tore 409 ##ead 468 cst ##osa canada agent va ##jiang ##ちは ##ーク ##lam sg ##nix ##sday ##よって g6 ##master bing ##zl charlie 16 8mm nb40 ##ーン thai ##ルフ ln284ct ##itz ##2f bonnie ##food ##lent originals ##stro ##lts 418 ∟∣ ##bscribe children ntd yesstyle ##かも hmv ##tment d5 2cm arts sms ##pn ##я ##いい topios9 539 lifestyle virtual ##ague xz ##deo muji 024 unt ##nnis ##ᅩ faq1 1884 396 ##ette fly 64㎡ はしめまして 441 curry ##pop のこ release ##← ##◆◆ ##cast 073 ありな 500ml ##ews 5c ##stle ios7 ##ima 787 dog lenovo ##r4 roger 013 cbs vornado 100m 417 ##desk ##クok ##ald 1867 9595 2900 ##van oil ##x some break common ##jy ##lines g7 twice 419 ella nano belle にこ ##mes ##self ##note jb ##ことかてきます benz ##との ##ova 451 save ##wing ##ますのて kai りは ##hua ##rect rainer ##unge 448 ##0m adsl ##かな guestname ##uma ##kins ##zu tokichoi ##price county ##med ##mus rmk 391 address vm えて openload ##group ##hin ##iginal amg urban ##oz jobs emi ##public beautiful ##sch album ##dden ##bell jerry works hostel miller ##drive ##rmin ##10 376 boot 828 ##370 ##fx ##cm~ 1885 ##nome ##ctionary ##oman ##lish ##cr ##hm 433 ##how 432 francis xi c919 b5 evernote ##uc vga ##3000 coupe ##urg ##cca ##uality 019 6g れる multi ##また ##ett em hey ##ani ##tax ##rma inside than 740 leonnhurt ##jin ict れた bird notes 200mm くの ##dical ##lli result 442 iu ee 438 smap gopro ##last yin pure 998 32g けた 5kg ##dan ##rame mama ##oot bean marketing ##hur 2l bella sync xuite ##ground 515 discuz ##getrelax ##ince ##bay ##5s cj ##イス gmat apt ##pass jing ##rix c4 rich ##とても niusnews ##ello bag 770 ##eting ##mobile 18 culture 015 ##のてすか 377 1020 area ##ience 616 details gp universal silver dit はお private ddd u11 kanshu ##ified fung ##nny dx ##520 tai 475 023 ##fr ##lean 3s ##pin 429 ##rin 25000 ly rick ##bility usb3 banner ##baru ##gion metal dt vdf 1871 karl qualcomm bear 1010 oldid ian jo ##tors population ##ernel 1882 mmorpg ##mv ##bike 603 ##© ww friend ##ager exhibition ##del ##pods fpx structure ##free ##tings kl ##rley ##copyright ##mma california 3400 orange yoga 4l canmake honey ##anda ##コメント 595 nikkie ##ルハイト dhl publishing ##mall ##gnet 20cm 513 ##クセス ##┅ e88 970 ##dog fishbase ##! ##" ### ##$ ##% ##& ##' ##( ##) ##* ##+ ##, ##- ##. ##/ ##: ##; ##< ##= ##> ##? ##@ ##[ ##\ ##] ##^ ##_ ##{ ##| ##} ##~ ##£ ##¤ ##¥ ##§ ##« ##± ##³ ##µ ##· ##¹ ##º ##» ##¼ ##ß ##æ ##÷ ##ø ##đ ##ŋ ##ɔ ##ə ##ɡ ##ʰ ##ˇ ##ˈ ##ˊ ##ˋ ##ˍ ##ː ##˙ ##˚ ##ˢ ##α ##β ##γ ##δ ##ε ##η ##θ ##ι ##κ ##λ ##μ ##ν ##ο ##π ##ρ ##ς ##σ ##τ ##υ ##φ ##χ ##ψ ##б ##в ##г ##д ##е ##ж ##з ##к ##л ##м ##н ##о ##п ##р ##с ##т ##у ##ф ##х ##ц ##ч ##ш ##ы ##ь ##і ##ا ##ب ##ة ##ت ##د ##ر ##س ##ع ##ل ##م ##ن ##ه ##و ##ي ##۩ ##ก ##ง ##น ##ม ##ย ##ร ##อ ##า ##เ ##๑ ##་ ##ღ ##ᄀ ##ᄁ ##ᄂ ##ᄃ ##ᄅ ##ᄆ ##ᄇ ##ᄈ ##ᄉ ##ᄋ ##ᄌ ##ᄎ ##ᄏ ##ᄐ ##ᄑ ##ᄒ ##ᅢ ##ᅣ ##ᅥ ##ᅦ ##ᅧ ##ᅨ ##ᅪ ##ᅬ ##ᅭ ##ᅮ ##ᅯ ##ᅲ ##ᅳ ##ᅴ ##ᆷ ##ᆸ ##ᆺ ##ᆻ ##ᗜ ##ᵃ ##ᵉ ##ᵍ ##ᵏ ##ᵐ ##ᵒ ##ᵘ ##‖ ##„ ##† ##• ##‥ ##‧ ##
 ##‰ ##′ ##″ ##‹ ##› ##※ ##‿ ##⁄ ##ⁱ ##⁺ ##ⁿ ##₁ ##₃ ##₄ ##€ ##№ ##ⅰ ##ⅱ ##ⅲ ##ⅳ ##ⅴ ##↔ ##↗ ##↘ ##⇒ ##∀ ##− ##∕ ##∙ ##√ ##∞ ##∟ ##∠ ##∣ ##∩ ##∮ ##∶ ##∼ ##∽ ##≈ ##≒ ##≡ ##≤ ##≥ ##≦ ##≧ ##≪ ##≫ ##⊙ ##⋅ ##⋈ ##⋯ ##⌒ ##① ##② ##③ ##④ ##⑤ ##⑥ ##⑦ ##⑧ ##⑨ ##⑩ ##⑴ ##⑵ ##⑶ ##⑷ ##⑸ ##⒈ ##⒉ ##⒊ ##⒋ ##ⓒ ##ⓔ ##ⓘ ##━ ##┃ ##┆ ##┊ ##┌ ##└ ##├ ##┣ ##═ ##║ ##╚ ##╞ ##╠ ##╭ ##╮ ##╯ ##╰ ##╱ ##╳ ##▂ ##▃ ##▅ ##▇ ##▉ ##▋ ##▌ ##▍ ##▎ ##□ ##▪ ##▫ ##▬ ##△ ##▶ ##► ##▽ ##◇ ##◕ ##◠ ##◢ ##◤ ##☀ ##☕ ##☞ ##☺ ##☼ ##♀ ##♂ ##♠ ##♡ ##♣ ##♦ ##♫ ##♬ ##✈ ##✔ ##✕ ##✖ ##✦ ##✨ ##✪ ##✰ ##✿ ##❀ ##➜ ##➤ ##⦿ ##、 ##。 ##〃 ##々 ##〇 ##〈 ##〉 ##《 ##》 ##「 ##」 ##『 ##』 ##【 ##】 ##〓 ##〔 ##〕 ##〖 ##〗 ##〜 ##〝 ##〞 ##ぃ ##ぇ ##ぬ ##ふ ##ほ ##む ##ゃ ##ゅ ##ゆ ##ょ ##゜ ##ゝ ##ァ ##ゥ ##エ ##ォ ##ケ ##サ ##セ ##ソ ##ッ ##ニ ##ヌ ##ネ ##ノ ##ヘ ##モ ##ャ ##ヤ ##ュ ##ユ ##ョ ##ヨ ##ワ ##ヲ ##・ ##ヽ ##ㄅ ##ㄆ ##ㄇ ##ㄉ ##ㄋ ##ㄌ ##ㄍ ##ㄎ ##ㄏ ##ㄒ ##ㄚ ##ㄛ ##ㄞ ##ㄟ ##ㄢ ##ㄤ ##ㄥ ##ㄧ ##ㄨ ##ㆍ ##㈦ ##㊣ ##㗎 ##一 ##丁 ##七 ##万 ##丈 ##三 ##上 ##下 ##不 ##与 ##丐 ##丑 ##专 ##且 ##丕 ##世 ##丘 ##丙 ##业 ##丛 ##东 ##丝 ##丞 ##丟 ##両 ##丢 ##两 ##严 ##並 ##丧 ##丨 ##个 ##丫 ##中 ##丰 ##串 ##临 ##丶 ##丸 ##丹 ##为 ##主 ##丼 ##丽 ##举 ##丿 ##乂 ##乃 ##久 ##么 ##义 ##之 ##乌 ##乍 ##乎 ##乏 ##乐 ##乒 ##乓 ##乔 ##乖 ##乗 ##乘 ##乙 ##乜 ##九 ##乞 ##也 ##习 ##乡 ##书 ##乩 ##买 ##乱 ##乳 ##乾 ##亀 ##亂 ##了 ##予 ##争 ##事 ##二 ##于 ##亏 ##云 ##互 ##五 ##井 ##亘 ##亙 ##亚 ##些 ##亜 ##亞 ##亟 ##亡 ##亢 ##交 ##亥 ##亦 ##产 ##亨 ##亩 ##享 ##京 ##亭 ##亮 ##亲 ##亳 ##亵 ##人 ##亿 ##什 ##仁 ##仃 ##仄 ##仅 ##仆 ##仇 ##今 ##介 ##仍 ##从 ##仏 ##仑 ##仓 ##仔 ##仕 ##他 ##仗 ##付 ##仙 ##仝 ##仞 ##仟 ##代 ##令 ##以 ##仨 ##仪 ##们 ##仮 ##仰 ##仲 ##件 ##价 ##任 ##份 ##仿 ##企 ##伉 ##伊 ##伍 ##伎 ##伏 ##伐 ##休 ##伕 ##众 ##优 ##伙 ##会 ##伝 ##伞 ##伟 ##传 ##伢 ##伤 ##伦 ##伪 ##伫 ##伯 ##估 ##伴 ##伶 ##伸 ##伺 ##似 ##伽 ##佃 ##但 ##佇 ##佈 ##位 ##低 ##住 ##佐 ##佑 ##体 ##佔 ##何 ##佗 ##佘 ##余 ##佚 ##佛 ##作 ##佝 ##佞 ##佟 ##你 ##佢 ##佣 ##佤 ##佥 ##佩 ##佬 ##佯 ##佰 ##佳 ##併 ##佶 ##佻 ##佼 ##使 ##侃 ##侄 ##來 ##侈 ##例 ##侍 ##侏 ##侑 ##侖 ##侗 ##供 ##依 ##侠 ##価 ##侣 ##侥 ##侦 ##侧 ##侨 ##侬 ##侮 ##侯 ##侵 ##侶 ##侷 ##便 ##係 ##促 ##俄 ##俊 ##俎 ##俏 ##俐 ##俑 ##俗 ##俘 ##俚 ##保 ##俞 ##俟 ##俠 ##信 ##俨 ##俩 ##俪 ##俬 ##俭 ##修 ##俯 ##俱 ##俳 ##俸 ##俺 ##俾 ##倆 ##倉 ##個 ##倌 ##倍 ##倏 ##們 ##倒 ##倔 ##倖 ##倘 ##候 ##倚 ##倜 ##借 ##倡 ##値 ##倦 ##倩 ##倪 ##倫 ##倬 ##倭 ##倶 ##债 ##值 ##倾 ##偃 ##假 ##偈 ##偉 ##偌 ##偎 ##偏 ##偕 ##做 ##停 ##健 ##側 ##偵 ##偶 ##偷 ##偻 ##偽 ##偿 ##傀 ##傅 ##傍 ##傑 ##傘 ##備 ##傚 ##傢 ##傣 ##傥 ##储 ##傩 ##催 ##傭 ##傲 ##傳 ##債 ##傷 ##傻 ##傾 ##僅 ##働 ##像 ##僑 ##僕 ##僖 ##僚 ##僥 ##僧 ##僭 ##僮 ##僱 ##僵 ##價 ##僻 ##儀 ##儂 ##億 ##儆 ##儉 ##儋 ##儒 ##儕 ##儘 ##償 ##儡 ##優 ##儲 ##儷 ##儼 ##儿 ##兀 ##允 ##元 ##兄 ##充 ##兆 ##兇 ##先 ##光 ##克 ##兌 ##免 ##児 ##兑 ##兒 ##兔 ##兖 ##党 ##兜 ##兢 ##入 ##內 ##全 ##兩 ##八 ##公 ##六 ##兮 ##兰 ##共 ##兲 ##关 ##兴 ##兵 ##其 ##具 ##典 ##兹 ##养 ##兼 ##兽 ##冀 ##内 ##円 ##冇 ##冈 ##冉 ##冊 ##册 ##再 ##冏 ##冒 ##冕 ##冗 ##写 ##军 ##农 ##冠 ##冢 ##冤 ##冥 ##冨 ##冪 ##冬 ##冯 ##冰 ##冲 ##决 ##况 ##冶 ##冷 ##冻 ##冼 ##冽 ##冾 ##净 ##凄 ##准 ##凇 ##凈 ##凉 ##凋 ##凌 ##凍 ##减 ##凑 ##凛 ##凜 ##凝 ##几 ##凡 ##凤 ##処 ##凪 ##凭 ##凯 ##凰 ##凱 ##凳 ##凶 ##凸 ##凹 ##出 ##击 ##函 ##凿 ##刀 ##刁 ##刃 ##分 ##切 ##刈 ##刊 ##刍 ##刎 ##刑 ##划 ##列 ##刘 ##则 ##刚 ##创 ##初 ##删 ##判 ##別 ##刨 ##利 ##刪 ##别 ##刮 ##到 ##制 ##刷 ##券 ##刹 ##刺 ##刻 ##刽 ##剁 ##剂 ##剃 ##則 ##剉 ##削 ##剋 ##剌 ##前 ##剎 ##剐 ##剑 ##剔 ##剖 ##剛 ##剜 ##剝 ##剣 ##剤 ##剥 ##剧 ##剩 ##剪 ##副 ##割 ##創 ##剷 ##剽 ##剿 ##劃 ##劇 ##劈 ##劉 ##劊 ##劍 ##劏 ##劑 ##力 ##劝 ##办 ##功 ##加 ##务 ##劣 ##动 ##助 ##努 ##劫 ##劭 ##励 ##劲 ##劳 ##労 ##劵 ##効 ##劾 ##势 ##勁 ##勃 ##勇 ##勉 ##勋 ##勐 ##勒 ##動 ##勖 ##勘 ##務 ##勛 ##勝 ##勞 ##募 ##勢 ##勤 ##勧 ##勳 ##勵 ##勸 ##勺 ##勻 ##勾 ##勿 ##匀 ##包 ##匆 ##匈 ##匍 ##匐 ##匕 ##化 ##北 ##匙 ##匝 ##匠 ##匡 ##匣 ##匪 ##匮 ##匯 ##匱 ##匹 ##区 ##医 ##匾 ##匿 ##區 ##十 ##千 ##卅 ##升 ##午 ##卉 ##半 ##卍 ##华 ##协 ##卑 ##卒 ##卓 ##協 ##单 ##卖 ##南 ##単 ##博 ##卜 ##卞 ##卟 ##占 ##卡 ##卢 ##卤 ##卦 ##卧 ##卫 ##卮 ##卯 ##印 ##危 ##即 ##却 ##卵 ##卷 ##卸 ##卻 ##卿 ##厂 ##厄 ##厅 ##历 ##厉 ##压 ##厌 ##厕 ##厘 ##厚 ##厝 ##原 ##厢 ##厥 ##厦 ##厨 ##厩 ##厭 ##厮 ##厲 ##厳 ##去 ##县 ##叁 ##参 ##參 ##又 ##叉 ##及 ##友 ##双 ##反 ##収 ##发 ##叔 ##取 ##受 ##变 ##叙 ##叛 ##叟 ##叠 ##叡 ##叢 ##口 ##古 ##句 ##另 ##叨 ##叩 ##只 ##叫 ##召 ##叭 ##叮 ##可 ##台 ##叱 ##史 ##右 ##叵 ##叶 ##号 ##司 ##叹 ##叻 ##叼 ##叽 ##吁 ##吃 ##各 ##吆 ##合 ##吉 ##吊 ##吋 ##同 ##名 ##后 ##吏 ##吐 ##向 ##吒 ##吓 ##吕 ##吖 ##吗 ##君 ##吝 ##吞 ##吟 ##吠 ##吡 ##否 ##吧 ##吨 ##吩 ##含 ##听 ##吭 ##吮 ##启 ##吱 ##吳 ##吴 ##吵 ##吶 ##吸 ##吹 ##吻 ##吼 ##吽 ##吾 ##呀 ##呂 ##呃 ##呆 ##呈 ##告 ##呋 ##呎 ##呐 ##呓 ##呕 ##呗 ##员 ##呛 ##呜 ##呢 ##呤 ##呦 ##周 ##呱 ##呲 ##味 ##呵 ##呷 ##呸 ##呻 ##呼 ##命 ##咀 ##咁 ##咂 ##咄 ##咆 ##咋 ##和 ##咎 ##咏 ##咐 ##咒 ##咔 ##咕 ##咖 ##咗 ##咘 ##咙 ##咚 ##咛 ##咣 ##咤 ##咦 ##咧 ##咨 ##咩 ##咪 ##咫 ##咬 ##咭 ##咯 ##咱 ##咲 ##咳 ##咸 ##咻 ##咽 ##咿 ##哀 ##品 ##哂 ##哄 ##哆 ##哇 ##哈 ##哉 ##哋 ##哌 ##响 ##哎 ##哏 ##哐 ##哑 ##哒 ##哔 ##哗 ##哟 ##員 ##哥 ##哦 ##哧 ##哨 ##哩 ##哪 ##哭 ##哮 ##哲 ##哺 ##哼 ##哽 ##唁 ##唄 ##唆 ##唇 ##唉 ##唏 ##唐 ##唑 ##唔 ##唠 ##唤 ##唧 ##唬 ##售 ##唯 ##唰 ##唱 ##唳 ##唷 ##唸 ##唾 ##啃 ##啄 ##商 ##啉 ##啊 ##問 ##啓 ##啕 ##啖 ##啜 ##啞 ##啟 ##啡 ##啤 ##啥 ##啦 ##啧 ##啪 ##啫 ##啬 ##啮 ##啰 ##啱 ##啲 ##啵 ##啶 ##啷 ##啸 ##啻 ##啼 ##啾 ##喀 ##喂 ##喃 ##善 ##喆 ##喇 ##喉 ##喊 ##喋 ##喎 ##喏 ##喔 ##喘 ##喙 ##喚 ##喜 ##喝 ##喟 ##喧 ##喪 ##喫 ##喬 ##單 ##喰 ##喱 ##喲 ##喳 ##喵 ##営 ##喷 ##喹 ##喺 ##喻 ##喽 ##嗅 ##嗆 ##嗇 ##嗎 ##嗑 ##嗒 ##嗓 ##嗔 ##嗖 ##嗚 ##嗜 ##嗝 ##嗟 ##嗡 ##嗣 ##嗤 ##嗦 ##嗨 ##嗪 ##嗬 ##嗯 ##嗰 ##嗲 ##嗳 ##嗶 ##嗷 ##嗽 ##嘀 ##嘅 ##嘆 ##嘈 ##嘉 ##嘌 ##嘍 ##嘎 ##嘔 ##嘖 ##嘗 ##嘘 ##嘚 ##嘛 ##嘜 ##嘞 ##嘟 ##嘢 ##嘣 ##嘤 ##嘧 ##嘩 ##嘭 ##嘮 ##嘯 ##嘰 ##嘱 ##嘲 ##嘴 ##嘶 ##嘸 ##嘹 ##嘻 ##嘿 ##噁 ##噌 ##噎 ##噓 ##噔 ##噗 ##噙 ##噜 ##噠 ##噢 ##噤 ##器 ##噩 ##噪 ##噬 ##噱 ##噴 ##噶 ##噸 ##噹 ##噻 ##噼 ##嚀 ##嚇 ##嚎 ##嚏 ##嚐 ##嚓 ##嚕 ##嚟 ##嚣 ##嚥 ##嚨 ##嚮 ##嚴 ##嚷 ##嚼 ##囂 ##囉 ##囊 ##囍 ##囑 ##囔 ##囗 ##囚 ##四 ##囝 ##回 ##囟 ##因 ##囡 ##团 ##団 ##囤 ##囧 ##囪 ##囫 ##园 ##困 ##囱 ##囲 ##図 ##围 ##囹 ##固 ##国 ##图 ##囿 ##圃 ##圄 ##圆 ##圈 ##國 ##圍 ##圏 ##園 ##圓 ##圖 ##團 ##圜 ##土 ##圣 ##圧 ##在 ##圩 ##圭 ##地 ##圳 ##场 ##圻 ##圾 ##址 ##坂 ##均 ##坊 ##坍 ##坎 ##坏 ##坐 ##坑 ##块 ##坚 ##坛 ##坝 ##坞 ##坟 ##坠 ##坡 ##坤 ##坦 ##坨 ##坪 ##坯 ##坳 ##坵 ##坷 ##垂 ##垃 ##垄 ##型 ##垒 ##垚 ##垛 ##垠 ##垢 ##垣 ##垦 ##垩 ##垫 ##垭 ##垮 ##垵 ##埂 ##埃 ##埋 ##城 ##埔 ##埕 ##埗 ##域 ##埠 ##埤 ##埵 ##執 ##埸 ##培 ##基 ##埼 ##堀 ##堂 ##堃 ##堅 ##堆 ##堇 ##堑 ##堕 ##堙 ##堡 ##堤 ##堪 ##堯 ##堰 ##報 ##場 ##堵 ##堺 ##堿 ##塊 ##塌 ##塑 ##塔 ##塗 ##塘 ##塚 ##塞 ##塢 ##塩 ##填 ##塬 ##塭 ##塵 ##塾 ##墀 ##境 ##墅 ##墉 ##墊 ##墒 ##墓 ##増 ##墘 ##墙 ##墜 ##增 ##墟 ##墨 ##墩 ##墮 ##墳 ##墻 ##墾 ##壁 ##壅 ##壆 ##壇 ##壊 ##壑 ##壓 ##壕 ##壘 ##壞 ##壟 ##壢 ##壤 ##壩 ##士 ##壬 ##壮 ##壯 ##声 ##売 ##壳 ##壶 ##壹 ##壺 ##壽 ##处 ##备 ##変 ##复 ##夏 ##夔 ##夕 ##外 ##夙 ##多 ##夜 ##够 ##夠 ##夢 ##夥 ##大 ##天 ##太 ##夫 ##夭 ##央 ##夯 ##失 ##头 ##夷 ##夸 ##夹 ##夺 ##夾 ##奂 ##奄 ##奇 ##奈 ##奉 ##奋 ##奎 ##奏 ##奐 ##契 ##奔 ##奕 ##奖 ##套 ##奘 ##奚 ##奠 ##奢 ##奥 ##奧 ##奪 ##奬 ##奮 ##女 ##奴 ##奶 ##奸 ##她 ##好 ##如 ##妃 ##妄 ##妆 ##妇 ##妈 ##妊 ##妍 ##妒 ##妓 ##妖 ##妘 ##妙 ##妝 ##妞 ##妣 ##妤 ##妥 ##妨 ##妩 ##妪 ##妮 ##妲 ##妳 ##妹 ##妻 ##妾 ##姆 ##姉 ##姊 ##始 ##姍 ##姐 ##姑 ##姒 ##姓 ##委 ##姗 ##姚 ##姜 ##姝 ##姣 ##姥 ##姦 ##姨 ##姪 ##姫 ##姬 ##姹 ##姻 ##姿 ##威 ##娃 ##娄 ##娅 ##娆 ##娇 ##娉 ##娑 ##娓 ##娘 ##娛 ##娜 ##娟 ##娠 ##娣 ##娥 ##娩 ##娱 ##娲 ##娴 ##娶 ##娼 ##婀 ##婁 ##婆 ##婉 ##婊 ##婕 ##婚 ##婢 ##婦 ##婧 ##婪 ##婭 ##婴 ##婵 ##婶 ##婷 ##婺 ##婿 ##媒 ##媚 ##媛 ##媞 ##媧 ##媲 ##媳 ##媽 ##媾 ##嫁 ##嫂 ##嫉 ##嫌 ##嫑 ##嫔 ##嫖 ##嫘 ##嫚 ##嫡 ##嫣 ##嫦 ##嫩 ##嫲 ##嫵 ##嫻 ##嬅 ##嬉 ##嬌 ##嬗 ##嬛 ##嬢 ##嬤 ##嬪 ##嬰 ##嬴 ##嬷 ##嬸 ##嬿 ##孀 ##孃 ##子 ##孑 ##孔 ##孕 ##孖 ##字 ##存 ##孙 ##孚 ##孛 ##孜 ##孝 ##孟 ##孢 ##季 ##孤 ##学 ##孩 ##孪 ##孫 ##孬 ##孰 ##孱 ##孳 ##孵 ##學 ##孺 ##孽 ##孿 ##宁 ##它 ##宅 ##宇 ##守 ##安 ##宋 ##完 ##宏 ##宓 ##宕 ##宗 ##官 ##宙 ##定 ##宛 ##宜 ##宝 ##实 ##実 ##宠 ##审 ##客 ##宣 ##室 ##宥 ##宦 ##宪 ##宫 ##宮 ##宰 ##害 ##宴 ##宵 ##家 ##宸 ##容 ##宽 ##宾 ##宿 ##寂 ##寄 ##寅 ##密 ##寇 ##富 ##寐 ##寒 ##寓 ##寛 ##寝 ##寞 ##察 ##寡 ##寢 ##寥 ##實 ##寧 ##寨 ##審 ##寫 ##寬 ##寮 ##寰 ##寵 ##寶 ##寸 ##对 ##寺 ##寻 ##导 ##対 ##寿 ##封 ##専 ##射 ##将 ##將 ##專 ##尉 ##尊 ##尋 ##對 ##導 ##小 ##少 ##尔 ##尕 ##尖 ##尘 ##尚 ##尝 ##尤 ##尧 ##尬 ##就 ##尴 ##尷 ##尸 ##尹 ##尺 ##尻 ##尼 ##尽 ##尾 ##尿 ##局 ##屁 ##层 ##屄 ##居 ##屆 ##屈 ##屉 ##届 ##屋 ##屌 ##屍 ##屎 ##屏 ##屐 ##屑 ##展 ##屜 ##属 ##屠 ##屡 ##屢 ##層 ##履 ##屬 ##屯 ##山 ##屹 ##屿 ##岀 ##岁 ##岂 ##岌 ##岐 ##岑 ##岔 ##岖 ##岗 ##岘 ##岙 ##岚 ##岛 ##岡 ##岩 ##岫 ##岬 ##岭 ##岱 ##岳 ##岷 ##岸 ##峇 ##峋 ##峒 ##峙 ##峡 ##峤 ##峥 ##峦 ##峨 ##峪 ##峭 ##峯 ##峰 ##峴 ##島 ##峻 ##峽 ##崁 ##崂 ##崆 ##崇 ##崎 ##崑 ##崔 ##崖 ##崗 ##崙 ##崛 ##崧 ##崩 ##崭 ##崴 ##崽 ##嵇 ##嵊 ##嵋 ##嵌 ##嵐 ##嵘 ##嵩 ##嵬 ##嵯 ##嶂 ##嶄 ##嶇 ##嶋 ##嶙 ##嶺 ##嶼 ##嶽 ##巅 ##巍 ##巒 ##巔 ##巖 ##川 ##州 ##巡 ##巢 ##工 ##左 ##巧 ##巨 ##巩 ##巫 ##差 ##己 ##已 ##巳 ##巴 ##巷 ##巻 ##巽 ##巾 ##巿 ##币 ##市 ##布 ##帅 ##帆 ##师 ##希 ##帐 ##帑 ##帕 ##帖 ##帘 ##帚 ##帛 ##帜 ##帝 ##帥 ##带 ##帧 ##師 ##席 ##帮 ##帯 ##帰 ##帳 ##帶 ##帷 ##常 ##帼 ##帽 ##幀 ##幂 ##幄 ##幅 ##幌 ##幔 ##幕 ##幟 ##幡 ##幢 ##幣 ##幫 ##干 ##平 ##年 ##并 ##幸 ##幹 ##幺 ##幻 ##幼 ##幽 ##幾 ##广 ##庁 ##広 ##庄 ##庆 ##庇 ##床 ##序 ##庐 ##库 ##应 ##底 ##庖 ##店 ##庙 ##庚 ##府 ##庞 ##废 ##庠 ##度 ##座 ##庫 ##庭 ##庵 ##庶 ##康 ##庸 ##庹 ##庾 ##廁 ##廂 ##廃 ##廈 ##廉 ##廊 ##廓 ##廖 ##廚 ##廝 ##廟 ##廠 ##廢 ##廣 ##廬 ##廳 ##延 ##廷 ##建 ##廿 ##开 ##弁 ##异 ##弃 ##弄 ##弈 ##弊 ##弋 ##式 ##弑 ##弒 ##弓 ##弔 ##引 ##弗 ##弘 ##弛 ##弟 ##张 ##弥 ##弦 ##弧 ##弩 ##弭 ##弯 ##弱 ##張 ##強 ##弹 ##强 ##弼 ##弾 ##彅 ##彆 ##彈 ##彌 ##彎 ##归 ##当 ##录 ##彗 ##彙 ##彝 ##形 ##彤 ##彥 ##彦 ##彧 ##彩 ##彪 ##彫 ##彬 ##彭 ##彰 ##影 ##彷 ##役 ##彻 ##彼 ##彿 ##往 ##征 ##径 ##待 ##徇 ##很 ##徉 ##徊 ##律 ##後 ##徐 ##徑 ##徒 ##従 ##徕 ##得 ##徘 ##徙 ##徜 ##從 ##徠 ##御 ##徨 ##復 ##循 ##徬 ##微 ##徳 ##徴 ##徵 ##德 ##徹 ##徼 ##徽 ##心 ##必 ##忆 ##忌 ##忍 ##忏 ##忐 ##忑 ##忒 ##忖 ##志 ##忘 ##忙 ##応 ##忠 ##忡 ##忤 ##忧 ##忪 ##快 ##忱 ##念 ##忻 ##忽 ##忿 ##怀 ##态 ##怂 ##怅 ##怆 ##怎 ##怏 ##怒 ##怔 ##怕 ##怖 ##怙 ##怜 ##思 ##怠 ##怡 ##急 ##怦 ##性 ##怨 ##怪 ##怯 ##怵 ##总 ##怼 ##恁 ##恃 ##恆 ##恋 ##恍 ##恐 ##恒 ##恕 ##恙 ##恚 ##恢 ##恣 ##恤 ##恥 ##恨 ##恩 ##恪 ##恫 ##恬 ##恭 ##息 ##恰 ##恳 ##恵 ##恶 ##恸 ##恺 ##恻 ##恼 ##恿 ##悄 ##悅 ##悉 ##悌 ##悍 ##悔 ##悖 ##悚 ##悟 ##悠 ##患 ##悦 ##您 ##悩 ##悪 ##悬 ##悯 ##悱 ##悲 ##悴 ##悵 ##悶 ##悸 ##悻 ##悼 ##悽 ##情 ##惆 ##惇 ##惊 ##惋 ##惑 ##惕 ##惘 ##惚 ##惜 ##惟 ##惠 ##惡 ##惦 ##惧 ##惨 ##惩 ##惫 ##惬 ##惭 ##惮 ##惯 ##惰 ##惱 ##想 ##惴 ##惶 ##惹 ##惺 ##愁 ##愆 ##愈 ##愉 ##愍 ##意 ##愕 ##愚 ##愛 ##愜 ##感 ##愣 ##愤 ##愧 ##愫 ##愷 ##愿 ##慄 ##慈 ##態 ##慌 ##慎 ##慑 ##慕 ##慘 ##慚 ##慟 ##慢 ##慣 ##慧 ##慨 ##慫 ##慮 ##慰 ##慳 ##慵 ##慶 ##慷 ##慾 ##憂 ##憊 ##憋 ##憎 ##憐 ##憑 ##憔 ##憚 ##憤 ##憧 ##憨 ##憩 ##憫 ##憬 ##憲 ##憶 ##憾 ##懂 ##懇 ##懈 ##應 ##懊 ##懋 ##懑 ##懒 ##懦 ##懲 ##懵 ##懶 ##懷 ##懸 ##懺 ##懼 ##懾 ##懿 ##戀 ##戈 ##戊 ##戌 ##戍 ##戎 ##戏 ##成 ##我 ##戒 ##戕 ##或 ##战 ##戚 ##戛 ##戟 ##戡 ##戦 ##截 ##戬 ##戮 ##戰 ##戲 ##戳 ##戴 ##戶 ##户 ##戸 ##戻 ##戾 ##房 ##所 ##扁 ##扇 ##扈 ##扉 ##手 ##才 ##扎 ##扑 ##扒 ##打 ##扔 ##払 ##托 ##扛 ##扣 ##扦 ##执 ##扩 ##扪 ##扫 ##扬 ##扭 ##扮 ##扯 ##扰 ##扱 ##扳 ##扶 ##批 ##扼 ##找 ##承 ##技 ##抄 ##抉 ##把 ##抑 ##抒 ##抓 ##投 ##抖 ##抗 ##折 ##抚 ##抛 ##抜 ##択 ##抟 ##抠 ##抡 ##抢 ##护 ##报 ##抨 ##披 ##抬 ##抱 ##抵 ##抹 ##押 ##抽 ##抿 ##拂 ##拄 ##担 ##拆 ##拇 ##拈 ##拉 ##拋 ##拌 ##拍 ##拎 ##拐 ##拒 ##拓 ##拔 ##拖 ##拗 ##拘 ##拙 ##拚 ##招 ##拜 ##拟 ##拡 ##拢 ##拣 ##拥 ##拦 ##拧 ##拨 ##择 ##括 ##拭 ##拮 ##拯 ##拱 ##拳 ##拴 ##拷 ##拼 ##拽 ##拾 ##拿 ##持 ##挂 ##指 ##挈 ##按 ##挎 ##挑 ##挖 ##挙 ##挚 ##挛 ##挝 ##挞 ##挟 ##挠 ##挡 ##挣 ##挤 ##挥 ##挨 ##挪 ##挫 ##振 ##挲 ##挹 ##挺 ##挽 ##挾 ##捂 ##捅 ##捆 ##捉 ##捋 ##捌 ##捍 ##捎 ##捏 ##捐 ##捕 ##捞 ##损 ##捡 ##换 ##捣 ##捧 ##捨 ##捩 ##据 ##捱 ##捲 ##捶 ##捷 ##捺 ##捻 ##掀 ##掂 ##掃 ##掇 ##授 ##掉 ##掌 ##掏 ##掐 ##排 ##掖 ##掘 ##掙 ##掛 ##掠 ##採 ##探 ##掣 ##接 ##控 ##推 ##掩 ##措 ##掬 ##掰 ##掲 ##掳 ##掴 ##掷 ##掸 ##掺 ##揀 ##揃 ##揄 ##揆 ##揉 ##揍 ##描 ##提 ##插 ##揖 ##揚 ##換 ##握 ##揣 ##揩 ##揪 ##揭 ##揮 ##援 ##揶 ##揸 ##揹 ##揽 ##搀 ##搁 ##搂 ##搅 ##損 ##搏 ##搐 ##搓 ##搔 ##搖 ##搗 ##搜 ##搞 ##搡 ##搪 ##搬 ##搭 ##搵 ##搶 ##携 ##搽 ##摀 ##摁 ##摄 ##摆 ##摇 ##摈 ##摊 ##摒 ##摔 ##摘 ##摞 ##摟 ##摧 ##摩 ##摯 ##摳 ##摸 ##摹 ##摺 ##摻 ##撂 ##撃 ##撅 ##撇 ##撈 ##撐 ##撑 ##撒 ##撓 ##撕 ##撚 ##撞 ##撤 ##撥 ##撩 ##撫 ##撬 ##播 ##撮 ##撰 ##撲 ##撵 ##撷 ##撸 ##撻 ##撼 ##撿 ##擀 ##擁 ##擂 ##擄 ##擅 ##擇 ##擊 ##擋 ##操 ##擎 ##擒 ##擔 ##擘 ##據 ##擞 ##擠 ##擡 ##擢 ##擦 ##擬 ##擰 ##擱 ##擲 ##擴 ##擷 ##擺 ##擼 ##擾 ##攀 ##攏 ##攒 ##攔 ##攘 ##攙 ##攜 ##攝 ##攞 ##攢 ##攣 ##攤 ##攥 ##攪 ##攫 ##攬 ##支 ##收 ##攸 ##改 ##攻 ##放 ##政 ##故 ##效 ##敌 ##敍 ##敎 ##敏 ##救 ##敕 ##敖 ##敗 ##敘 ##教 ##敛 ##敝 ##敞 ##敢 ##散 ##敦 ##敬 ##数 ##敲 ##整 ##敵 ##敷 ##數 ##斂 ##斃 ##文 ##斋 ##斌 ##斎 ##斐 ##斑 ##斓 ##斗 ##料 ##斛 ##斜 ##斟 ##斡 ##斤 ##斥 ##斧 ##斩 ##斫 ##斬 ##断 ##斯 ##新 ##斷 ##方 ##於 ##施 ##旁 ##旃 ##旅 ##旋 ##旌 ##旎 ##族 ##旖 ##旗 ##无 ##既 ##日 ##旦 ##旧 ##旨 ##早 ##旬 ##旭 ##旮 ##旱 ##时 ##旷 ##旺 ##旻 ##昀 ##昂 ##昆 ##昇 ##昉 ##昊 ##昌 ##明 ##昏 ##易 ##昔 ##昕 ##昙 ##星 ##映 ##春 ##昧 ##昨 ##昭 ##是 ##昱 ##昴 ##昵 ##昶 ##昼 ##显 ##晁 ##時 ##晃 ##晉 ##晋 ##晌 ##晏 ##晒 ##晓 ##晔 ##晕 ##晖 ##晗 ##晚 ##晝 ##晞 ##晟 ##晤 ##晦 ##晨 ##晩 ##普 ##景 ##晰 ##晴 ##晶 ##晷 ##智 ##晾 ##暂 ##暄 ##暇 ##暈 ##暉 ##暌 ##暐 ##暑 ##暖 ##暗 ##暝 ##暢 ##暧 ##暨 ##暫 ##暮 ##暱 ##暴 ##暸 ##暹 ##曄 ##曆 ##曇 ##曉 ##曖 ##曙 ##曜 ##曝 ##曠 ##曦 ##曬 ##曰 ##曲 ##曳 ##更 ##書 ##曹 ##曼 ##曾 ##替 ##最 ##會 ##月 ##有 ##朋 ##服 ##朐 ##朔 ##朕 ##朗 ##望 ##朝 ##期 ##朦 ##朧 ##木 ##未 ##末 ##本 ##札 ##朮 ##术 ##朱 ##朴 ##朵 ##机 ##朽 ##杀 ##杂 ##权 ##杆 ##杈 ##杉 ##李 ##杏 ##材 ##村 ##杓 ##杖 ##杜 ##杞 ##束 ##杠 ##条 ##来 ##杨 ##杭 ##杯 ##杰 ##東 ##杳 ##杵 ##杷 ##杼 ##松 ##板 ##极 ##构 ##枇 ##枉 ##枋 ##析 ##枕 ##林 ##枚 ##果 ##枝 ##枢 ##枣 ##枪 ##枫 ##枭 ##枯 ##枰 ##枱 ##枳 ##架 ##枷 ##枸 ##柄 ##柏 ##某 ##柑 ##柒 ##染 ##柔 ##柘 ##柚 ##柜 ##柞 ##柠 ##柢 ##查 ##柩 ##柬 ##柯 ##柱 ##柳 ##柴 ##柵 ##査 ##柿 ##栀 ##栃 ##栄 ##栅 ##标 ##栈 ##栉 ##栋 ##栎 ##栏 ##树 ##栓 ##栖 ##栗 ##校 ##栩 ##株 ##样 ##核 ##根 ##格 ##栽 ##栾 ##桀 ##桁 ##桂 ##桃 ##桅 ##框 ##案 ##桉 ##桌 ##桎 ##桐 ##桑 ##桓 ##桔 ##桜 ##桠 ##桡 ##桢 ##档 ##桥 ##桦 ##桧 ##桨 ##桩 ##桶 ##桿 ##梁 ##梅 ##梆 ##梏 ##梓 ##梗 ##條 ##梟 ##梢 ##梦 ##梧 ##梨 ##梭 ##梯 ##械 ##梳 ##梵 ##梶 ##检 ##棂 ##棄 ##棉 ##棋 ##棍 ##棒 ##棕 ##棗 ##棘 ##棚 ##棟 ##棠 ##棣 ##棧 ##森 ##棱 ##棲 ##棵 ##棹 ##棺 ##椁 ##椅 ##椋 ##植 ##椎 ##椒 ##検 ##椪 ##椭 ##椰 ##椹 ##椽 ##椿 ##楂 ##楊 ##楓 ##楔 ##楚 ##楝 ##楞 ##楠 ##楣 ##楨 ##楫 ##業 ##楮 ##極 ##楷 ##楸 ##楹 ##楼 ##楽 ##概 ##榄 ##榆 ##榈 ##榉 ##榔 ##榕 ##榖 ##榛 ##榜 ##榨 ##榫 ##榭 ##榮 ##榱 ##榴 ##榷 ##榻 ##槁 ##槃 ##構 ##槌 ##槍 ##槎 ##槐 ##槓 ##様 ##槛 ##槟 ##槤 ##槭 ##槲 ##槳 ##槻 ##槽 ##槿 ##樁 ##樂 ##樊 ##樑 ##樓 ##標 ##樞 ##樟 ##模 ##樣 ##権 ##横 ##樫 ##樯 ##樱 ##樵 ##樸 ##樹 ##樺 ##樽 ##樾 ##橄 ##橇 ##橋 ##橐 ##橘 ##橙 ##機 ##橡 ##橢 ##橫 ##橱 ##橹 ##橼 ##檀 ##檄 ##檎 ##檐 ##檔 ##檗 ##檜 ##檢 ##檬 ##檯 ##檳 ##檸 ##檻 ##櫃 ##櫚 ##櫛 ##櫥 ##櫸 ##櫻 ##欄 ##權 ##欒 ##欖 ##欠 ##次 ##欢 ##欣 ##欧 ##欲 ##欸 ##欺 ##欽 ##款 ##歆 ##歇 ##歉 ##歌 ##歎 ##歐 ##歓 ##歙 ##歛 ##歡 ##止 ##正 ##此 ##步 ##武 ##歧 ##歩 ##歪 ##歯 ##歲 ##歳 ##歴 ##歷 ##歸 ##歹 ##死 ##歼 ##殁 ##殃 ##殆 ##殇 ##殉 ##殊 ##残 ##殒 ##殓 ##殖 ##殘 ##殞 ##殡 ##殤 ##殭 ##殯 ##殲 ##殴 ##段 ##殷 ##殺 ##殼 ##殿 ##毀 ##毁 ##毂 ##毅 ##毆 ##毋 ##母 ##毎 ##每 ##毒 ##毓 ##比 ##毕 ##毗 ##毘 ##毙 ##毛 ##毡 ##毫 ##毯 ##毽 ##氈 ##氏 ##氐 ##民 ##氓 ##气 ##氖 ##気 ##氙 ##氛 ##氟 ##氡 ##氢 ##氣 ##氤 ##氦 ##氧 ##氨 ##氪 ##氫 ##氮 ##氯 ##氰 ##氲 ##水 ##氷 ##永 ##氹 ##氾 ##汀 ##汁 ##求 ##汆 ##汇 ##汉 ##汎 ##汐 ##汕 ##汗 ##汙 ##汛 ##汝 ##汞 ##江 ##池 ##污 ##汤 ##汨 ##汩 ##汪 ##汰 ##汲 ##汴 ##汶 ##汹 ##決 ##汽 ##汾 ##沁 ##沂 ##沃 ##沅 ##沈 ##沉 ##沌 ##沏 ##沐 ##沒 ##沓 ##沖 ##沙 ##沛 ##沟 ##没 ##沢 ##沣 ##沥 ##沦 ##沧 ##沪 ##沫 ##沭 ##沮 ##沱 ##河 ##沸 ##油 ##治 ##沼 ##沽 ##沾 ##沿 ##況 ##泄 ##泉 ##泊 ##泌 ##泓 ##法 ##泗 ##泛 ##泞 ##泠 ##泡 ##波 ##泣 ##泥 ##注 ##泪 ##泫 ##泮 ##泯 ##泰 ##泱 ##泳 ##泵 ##泷 ##泸 ##泻 ##泼 ##泽 ##泾 ##洁 ##洄 ##洋 ##洒 ##洗 ##洙 ##洛 ##洞 ##津 ##洩 ##洪 ##洮 ##洱 ##洲 ##洵 ##洶 ##洸 ##洹 ##活 ##洼 ##洽 ##派 ##流 ##浃 ##浄 ##浅 ##浆 ##浇 ##浊 ##测 ##济 ##浏 ##浑 ##浒 ##浓 ##浔 ##浙 ##浚 ##浜 ##浣 ##浦 ##浩 ##浪 ##浬 ##浮 ##浯 ##浴 ##海 ##浸 ##涂 ##涅 ##涇 ##消 ##涉 ##涌 ##涎 ##涓 ##涔 ##涕 ##涙 ##涛 ##涝 ##涞 ##涟 ##涠 ##涡 ##涣 ##涤 ##润 ##涧 ##涨 ##涩 ##涪 ##涮 ##涯 ##液 ##涵 ##涸 ##涼 ##涿 ##淀 ##淄 ##淅 ##淆 ##淇 ##淋 ##淌 ##淑 ##淒 ##淖 ##淘 ##淙 ##淚 ##淞 ##淡 ##淤 ##淦 ##淨 ##淩 ##淪 ##淫 ##淬 ##淮 ##深 ##淳 ##淵 ##混 ##淹 ##淺 ##添 ##淼 ##清 ##済 ##渉 ##渊 ##渋 ##渍 ##渎 ##渐 ##渔 ##渗 ##渙 ##渚 ##減 ##渝 ##渠 ##渡 ##渣 ##渤 ##渥 ##渦 ##温 ##測 ##渭 ##港 ##渲 ##渴 ##游 ##渺 ##渾 ##湃 ##湄 ##湊 ##湍 ##湖 ##湘 ##湛 ##湟 ##湧 ##湫 ##湮 ##湯 ##湳 ##湾 ##湿 ##満 ##溃 ##溅 ##溉 ##溏 ##源 ##準 ##溜 ##溝 ##溟 ##溢 ##溥 ##溧 ##溪 ##溫 ##溯 ##溱 ##溴 ##溶 ##溺 ##溼 ##滁 ##滂 ##滄 ##滅 ##滇 ##滋 ##滌 ##滑 ##滓 ##滔 ##滕 ##滙 ##滚 ##滝 ##滞 ##滟 ##满 ##滢 ##滤 ##滥 ##滦 ##滨 ##滩 ##滬 ##滯 ##滲 ##滴 ##滷 ##滸 ##滾 ##滿 ##漁 ##漂 ##漆 ##漉 ##漏 ##漓 ##演 ##漕 ##漠 ##漢 ##漣 ##漩 ##漪 ##漫 ##漬 ##漯 ##漱 ##漲 ##漳 ##漸 ##漾 ##漿 ##潆 ##潇 ##潋 ##潍 ##潑 ##潔 ##潘 ##潛 ##潜 ##潞 ##潟 ##潢 ##潤 ##潦 ##潧 ##潭 ##潮 ##潰 ##潴 ##潸 ##潺 ##潼 ##澀 ##澄 ##澆 ##澈 ##澍 ##澎 ##澗 ##澜 ##澡 ##澤 ##澧 ##澱 ##澳 ##澹 ##激 ##濁 ##濂 ##濃 ##濑 ##濒 ##濕 ##濘 ##濛 ##濟 ##濠 ##濡 ##濤 ##濫 ##濬 ##濮 ##濯 ##濱 ##濺 ##濾 ##瀅 ##瀆 ##瀉 ##瀋 ##瀏 ##瀑 ##瀕 ##瀘 ##瀚 ##瀛 ##瀝 ##瀞 ##瀟 ##瀧 ##瀨 ##瀬 ##瀰 ##瀾 ##灌 ##灏 ##灑 ##灘 ##灝 ##灞 ##灣 ##火 ##灬 ##灭 ##灯 ##灰 ##灵 ##灶 ##灸 ##灼 ##災 ##灾 ##灿 ##炀 ##炁 ##炅 ##炉 ##炊 ##炎 ##炒 ##炔 ##炕 ##炖 ##炙 ##炜 ##炫 ##炬 ##炭 ##炮 ##炯 ##炳 ##炷 ##炸 ##点 ##為 ##炼 ##炽 ##烁 ##烂 ##烃 ##烈 ##烊 ##烏 ##烘 ##烙 ##烛 ##烟 ##烤 ##烦 ##烧 ##烨 ##烩 ##烫 ##烬 ##热 ##烯 ##烷 ##烹 ##烽 ##焉 ##焊 ##焕 ##焖 ##焗 ##焘 ##焙 ##焚 ##焜 ##無 ##焦 ##焯 ##焰 ##焱 ##然 ##焼 ##煅 ##煉 ##煊 ##煌 ##煎 ##煒 ##煖 ##煙 ##煜 ##煞 ##煤 ##煥 ##煦 ##照 ##煨 ##煩 ##煮 ##煲 ##煸 ##煽 ##熄 ##熊 ##熏 ##熒 ##熔 ##熙 ##熟 ##熠 ##熨 ##熬 ##熱 ##熵 ##熹 ##熾 ##燁 ##燃 ##燄 ##燈 ##燉 ##燊 ##燎 ##燒 ##燔 ##燕 ##燙 ##燜 ##營 ##燥 ##燦 ##燧 ##燭 ##燮 ##燴 ##燻 ##燼 ##燿 ##爆 ##爍 ##爐 ##爛 ##爪 ##爬 ##爭 ##爰 ##爱 ##爲 ##爵 ##父 ##爷 ##爸 ##爹 ##爺 ##爻 ##爽 ##爾 ##牆 ##片 ##版 ##牌 ##牍 ##牒 ##牙 ##牛 ##牝 ##牟 ##牠 ##牡 ##牢 ##牦 ##牧 ##物 ##牯 ##牲 ##牴 ##牵 ##特 ##牺 ##牽 ##犀 ##犁 ##犄 ##犊 ##犍 ##犒 ##犢 ##犧 ##犬 ##犯 ##状 ##犷 ##犸 ##犹 ##狀 ##狂 ##狄 ##狈 ##狎 ##狐 ##狒 ##狗 ##狙 ##狞 ##狠 ##狡 ##狩 ##独 ##狭 ##狮 ##狰 ##狱 ##狸 ##狹 ##狼 ##狽 ##猎 ##猕 ##猖 ##猗 ##猙 ##猛 ##猜 ##猝 ##猥 ##猩 ##猪 ##猫 ##猬 ##献 ##猴 ##猶 ##猷 ##猾 ##猿 ##獄 ##獅 ##獎 ##獐 ##獒 ##獗 ##獠 ##獣 ##獨 ##獭 ##獰 ##獲 ##獵 ##獷 ##獸 ##獺 ##獻 ##獼 ##獾 ##玄 ##率 ##玉 ##王 ##玑 ##玖 ##玛 ##玟 ##玠 ##玥 ##玩 ##玫 ##玮 ##环 ##现 ##玲 ##玳 ##玷 ##玺 ##玻 ##珀 ##珂 ##珅 ##珈 ##珉 ##珊 ##珍 ##珏 ##珐 ##珑 ##珙 ##珞 ##珠 ##珣 ##珥 ##珩 ##珪 ##班 ##珮 ##珲 ##珺 ##現 ##球 ##琅 ##理 ##琇 ##琉 ##琊 ##琍 ##琏 ##琐 ##琛 ##琢 ##琥 ##琦 ##琨 ##琪 ##琬 ##琮 ##琰 ##琲 ##琳 ##琴 ##琵 ##琶 ##琺 ##琼 ##瑀 ##瑁 ##瑄 ##瑋 ##瑕 ##瑗 ##瑙 ##瑚 ##瑛 ##瑜 ##瑞 ##瑟 ##瑠 ##瑣 ##瑤 ##瑩 ##瑪 ##瑯 ##瑰 ##瑶 ##瑾 ##璀 ##璁 ##璃 ##璇 ##璉 ##璋 ##璎 ##璐 ##璜 ##璞 ##璟 ##璧 ##璨 ##環 ##璽 ##璿 ##瓊 ##瓏 ##瓒 ##瓜 ##瓢 ##瓣 ##瓤 ##瓦 ##瓮 ##瓯 ##瓴 ##瓶 ##瓷 ##甄 ##甌 ##甕 ##甘 ##甙 ##甚 ##甜 ##生 ##產 ##産 ##甥 ##甦 ##用 ##甩 ##甫 ##甬 ##甭 ##甯 ##田 ##由 ##甲 ##申 ##电 ##男 ##甸 ##町 ##画 ##甾 ##畀 ##畅 ##界 ##畏 ##畑 ##畔 ##留 ##畜 ##畝 ##畢 ##略 ##畦 ##番 ##畫 ##異 ##畲 ##畳 ##畴 ##當 ##畸 ##畹 ##畿 ##疆 ##疇 ##疊 ##疏 ##疑 ##疔 ##疖 ##疗 ##疙 ##疚 ##疝 ##疟 ##疡 ##疣 ##疤 ##疥 ##疫 ##疮 ##疯 ##疱 ##疲 ##疳 ##疵 ##疸 ##疹 ##疼 ##疽 ##疾 ##痂 ##病 ##症 ##痈 ##痉 ##痊 ##痍 ##痒 ##痔 ##痕 ##痘 ##痙 ##痛 ##痞 ##痠 ##痢 ##痣 ##痤 ##痧 ##痨 ##痪 ##痫 ##痰 ##痱 ##痴 ##痹 ##痺 ##痼 ##痿 ##瘀 ##瘁 ##瘋 ##瘍 ##瘓 ##瘘 ##瘙 ##瘟 ##瘠 ##瘡 ##瘢 ##瘤 ##瘦 ##瘧 ##瘩 ##瘪 ##瘫 ##瘴 ##瘸 ##瘾 ##療 ##癇 ##癌 ##癒 ##癖 ##癜 ##癞 ##癡 ##癢 ##癣 ##癥 ##癫 ##癬 ##癮 ##癱 ##癲 ##癸 ##発 ##登 ##發 ##白 ##百 ##皂 ##的 ##皆 ##皇 ##皈 ##皋 ##皎 ##皑 ##皓 ##皖 ##皙 ##皚 ##皮 ##皰 ##皱 ##皴 ##皺 ##皿 ##盂 ##盃 ##盅 ##盆 ##盈 ##益 ##盎 ##盏 ##盐 ##监 ##盒 ##盔 ##盖 ##盗 ##盘 ##盛 ##盜 ##盞 ##盟 ##盡 ##監 ##盤 ##盥 ##盧 ##盪 ##目 ##盯 ##盱 ##盲 ##直 ##相 ##盹 ##盼 ##盾 ##省 ##眈 ##眉 ##看 ##県 ##眙 ##眞 ##真 ##眠 ##眦 ##眨 ##眩 ##眯 ##眶 ##眷 ##眸 ##眺 ##眼 ##眾 ##着 ##睁 ##睇 ##睏 ##睐 ##睑 ##睛 ##睜 ##睞 ##睡 ##睢 ##督 ##睥 ##睦 ##睨 ##睪 ##睫 ##睬 ##睹 ##睽 ##睾 ##睿 ##瞄 ##瞅 ##瞇 ##瞋 ##瞌 ##瞎 ##瞑 ##瞒 ##瞓 ##瞞 ##瞟 ##瞠 ##瞥 ##瞧 ##瞩 ##瞪 ##瞬 ##瞭 ##瞰 ##瞳 ##瞻 ##瞼 ##瞿 ##矇 ##矍 ##矗 ##矚 ##矛 ##矜 ##矢 ##矣 ##知 ##矩 ##矫 ##短 ##矮 ##矯 ##石 ##矶 ##矽 ##矾 ##矿 ##码 ##砂 ##砌 ##砍 ##砒 ##研 ##砖 ##砗 ##砚 ##砝 ##砣 ##砥 ##砧 ##砭 ##砰 ##砲 ##破 ##砷 ##砸 ##砺 ##砼 ##砾 ##础 ##硅 ##硐 ##硒 ##硕 ##硝 ##硫 ##硬 ##确 ##硯 ##硼 ##碁 ##碇 ##碉 ##碌 ##碍 ##碎 ##碑 ##碓 ##碗 ##碘 ##碚 ##碛 ##碟 ##碣 ##碧 ##碩 ##碰 ##碱 ##碳 ##碴 ##確 ##碼 ##碾 ##磁 ##磅 ##磊 ##磋 ##磐 ##磕 ##磚 ##磡 ##磨 ##磬 ##磯 ##磲 ##磷 ##磺 ##礁 ##礎 ##礙 ##礡 ##礦 ##礪 ##礫 ##礴 ##示 ##礼 ##社 ##祀 ##祁 ##祂 ##祇 ##祈 ##祉 ##祎 ##祐 ##祕 ##祖 ##祗 ##祚 ##祛 ##祜 ##祝 ##神 ##祟 ##祠 ##祢 ##祥 ##票 ##祭 ##祯 ##祷 ##祸 ##祺 ##祿 ##禀 ##禁 ##禄 ##禅 ##禍 ##禎 ##福 ##禛 ##禦 ##禧 ##禪 ##禮 ##禱 ##禹 ##禺 ##离 ##禽 ##禾 ##禿 ##秀 ##私 ##秃 ##秆 ##秉 ##秋 ##种 ##科 ##秒 ##秘 ##租 ##秣 ##秤 ##秦 ##秧 ##秩 ##秭 ##积 ##称 ##秸 ##移 ##秽 ##稀 ##稅 ##程 ##稍 ##税 ##稔 ##稗 ##稚 ##稜 ##稞 ##稟 ##稠 ##稣 ##種 ##稱 ##稲 ##稳 ##稷 ##稹 ##稻 ##稼 ##稽 ##稿 ##穀 ##穂 ##穆 ##穌 ##積 ##穎 ##穗 ##穢 ##穩 ##穫 ##穴 ##究 ##穷 ##穹 ##空 ##穿 ##突 ##窃 ##窄 ##窈 ##窍 ##窑 ##窒 ##窓 ##窕 ##窖 ##窗 ##窘 ##窜 ##窝 ##窟 ##窠 ##窥 ##窦 ##窨 ##窩 ##窪 ##窮 ##窯 ##窺 ##窿 ##竄 ##竅 ##竇 ##竊 ##立 ##竖 ##站 ##竜 ##竞 ##竟 ##章 ##竣 ##童 ##竭 ##端 ##競 ##竹 ##竺 ##竽 ##竿 ##笃 ##笆 ##笈 ##笋 ##笏 ##笑 ##笔 ##笙 ##笛 ##笞 ##笠 ##符 ##笨 ##第 ##笹 ##笺 ##笼 ##筆 ##等 ##筊 ##筋 ##筍 ##筏 ##筐 ##筑 ##筒 ##答 ##策 ##筛 ##筝 ##筠 ##筱 ##筲 ##筵 ##筷 ##筹 ##签 ##简 ##箇 ##箋 ##箍 ##箏 ##箐 ##箔 ##箕 ##算 ##箝 ##管 ##箩 ##箫 ##箭 ##箱 ##箴 ##箸 ##節 ##篁 ##範 ##篆 ##篇 ##築 ##篑 ##篓 ##篙 ##篝 ##篠 ##篡 ##篤 ##篩 ##篪 ##篮 ##篱 ##篷 ##簇 ##簌 ##簍 ##簡 ##簦 ##簧 ##簪 ##簫 ##簷 ##簸 ##簽 ##簾 ##簿 ##籁 ##籃 ##籌 ##籍 ##籐 ##籟 ##籠 ##籤 ##籬 ##籮 ##籲 ##米 ##类 ##籼 ##籽 ##粄 ##粉 ##粑 ##粒 ##粕 ##粗 ##粘 ##粟 ##粤 ##粥 ##粧 ##粪 ##粮 ##粱 ##粲 ##粳 ##粵 ##粹 ##粼 ##粽 ##精 ##粿 ##糅 ##糊 ##糍 ##糕 ##糖 ##糗 ##糙 ##糜 ##糞 ##糟 ##糠 ##糧 ##糬 ##糯 ##糰 ##糸 ##系 ##糾 ##紀 ##紂 ##約 ##紅 ##紉 ##紊 ##紋 ##納 ##紐 ##紓 ##純 ##紗 ##紘 ##紙 ##級 ##紛 ##紜 ##素 ##紡 ##索 ##紧 ##紫 ##紮 ##累 ##細 ##紳 ##紹 ##紺 ##終 ##絃 ##組 ##絆 ##経 ##結 ##絕 ##絞 ##絡 ##絢 ##給 ##絨 ##絮 ##統 ##絲 ##絳 ##絵 ##絶 ##絹 ##綁 ##綏 ##綑 ##經 ##継 ##続 ##綜 ##綠 ##綢 ##綦 ##綫 ##綬 ##維 ##綱 ##網 ##綴 ##綵 ##綸 ##綺 ##綻 ##綽 ##綾 ##綿 ##緊 ##緋 ##総 ##緑 ##緒 ##緘 ##線 ##緝 ##緞 ##締 ##緣 ##編 ##緩 ##緬 ##緯 ##練 ##緹 ##緻 ##縁 ##縄 ##縈 ##縛 ##縝 ##縣 ##縫 ##縮 ##縱 ##縴 ##縷 ##總 ##績 ##繁 ##繃 ##繆 ##繇 ##繋 ##織 ##繕 ##繚 ##繞 ##繡 ##繩 ##繪 ##繫 ##繭 ##繳 ##繹 ##繼 ##繽 ##纂 ##續 ##纍 ##纏 ##纓 ##纔 ##纖 ##纜 ##纠 ##红 ##纣 ##纤 ##约 ##级 ##纨 ##纪 ##纫 ##纬 ##纭 ##纯 ##纰 ##纱 ##纲 ##纳 ##纵 ##纶 ##纷 ##纸 ##纹 ##纺 ##纽 ##纾 ##线 ##绀 ##练 ##组 ##绅 ##细 ##织 ##终 ##绊 ##绍 ##绎 ##经 ##绑 ##绒 ##结 ##绔 ##绕 ##绘 ##给 ##绚 ##绛 ##络 ##绝 ##绞 ##统 ##绡 ##绢 ##绣 ##绥 ##绦 ##继 ##绩 ##绪 ##绫 ##续 ##绮 ##绯 ##绰 ##绳 ##维 ##绵 ##绶 ##绷 ##绸 ##绻 ##综 ##绽 ##绾 ##绿 ##缀 ##缄 ##缅 ##缆 ##缇 ##缈 ##缉 ##缎 ##缓 ##缔 ##缕 ##编 ##缘 ##缙 ##缚 ##缜 ##缝 ##缠 ##缢 ##缤 ##缥 ##缨 ##缩 ##缪 ##缭 ##缮 ##缰 ##缱 ##缴 ##缸 ##缺 ##缽 ##罂 ##罄 ##罌 ##罐 ##网 ##罔 ##罕 ##罗 ##罚 ##罡 ##罢 ##罩 ##罪 ##置 ##罰 ##署 ##罵 ##罷 ##罹 ##羁 ##羅 ##羈 ##羊 ##羌 ##美 ##羔 ##羚 ##羞 ##羟 ##羡 ##羣 ##群 ##羥 ##羧 ##羨 ##義 ##羯 ##羲 ##羸 ##羹 ##羽 ##羿 ##翁 ##翅 ##翊 ##翌 ##翎 ##習 ##翔 ##翘 ##翟 ##翠 ##翡 ##翦 ##翩 ##翰 ##翱 ##翳 ##翹 ##翻 ##翼 ##耀 ##老 ##考 ##耄 ##者 ##耆 ##耋 ##而 ##耍 ##耐 ##耒 ##耕 ##耗 ##耘 ##耙 ##耦 ##耨 ##耳 ##耶 ##耷 ##耸 ##耻 ##耽 ##耿 ##聂 ##聆 ##聊 ##聋 ##职 ##聒 ##联 ##聖 ##聘 ##聚 ##聞 ##聪 ##聯 ##聰 ##聲 ##聳 ##聴 ##聶 ##職 ##聽 ##聾 ##聿 ##肃 ##肄 ##肅 ##肆 ##肇 ##肉 ##肋 ##肌 ##肏 ##肓 ##肖 ##肘 ##肚 ##肛 ##肝 ##肠 ##股 ##肢 ##肤 ##肥 ##肩 ##肪 ##肮 ##肯 ##肱 ##育 ##肴 ##肺 ##肽 ##肾 ##肿 ##胀 ##胁 ##胃 ##胄 ##胆 ##背 ##胍 ##胎 ##胖 ##胚 ##胛 ##胜 ##胝 ##胞 ##胡 ##胤 ##胥 ##胧 ##胫 ##胭 ##胯 ##胰 ##胱 ##胳 ##胴 ##胶 ##胸 ##胺 ##能 ##脂 ##脅 ##脆 ##脇 ##脈 ##脉 ##脊 ##脍 ##脏 ##脐 ##脑 ##脓 ##脖 ##脘 ##脚 ##脛 ##脣 ##脩 ##脫 ##脯 ##脱 ##脲 ##脳 ##脸 ##脹 ##脾 ##腆 ##腈 ##腊 ##腋 ##腌 ##腎 ##腐 ##腑 ##腓 ##腔 ##腕 ##腥 ##腦 ##腩 ##腫 ##腭 ##腮 ##腰 ##腱 ##腳 ##腴 ##腸 ##腹 ##腺 ##腻 ##腼 ##腾 ##腿 ##膀 ##膈 ##膊 ##膏 ##膑 ##膘 ##膚 ##膛 ##膜 ##膝 ##膠 ##膦 ##膨 ##膩 ##膳 ##膺 ##膻 ##膽 ##膾 ##膿 ##臀 ##臂 ##臃 ##臆 ##臉 ##臊 ##臍 ##臓 ##臘 ##臟 ##臣 ##臥 ##臧 ##臨 ##自 ##臬 ##臭 ##至 ##致 ##臺 ##臻 ##臼 ##臾 ##舀 ##舂 ##舅 ##舆 ##與 ##興 ##舉 ##舊 ##舌 ##舍 ##舎 ##舐 ##舒 ##舔 ##舖 ##舗 ##舛 ##舜 ##舞 ##舟 ##航 ##舫 ##般 ##舰 ##舱 ##舵 ##舶 ##舷 ##舸 ##船 ##舺 ##舾 ##艇 ##艋 ##艘 ##艙 ##艦 ##艮 ##良 ##艰 ##艱 ##色 ##艳 ##艷 ##艹 ##艺 ##艾 ##节 ##芃 ##芈 ##芊 ##芋 ##芍 ##芎 ##芒 ##芙 ##芜 ##芝 ##芡 ##芥 ##芦 ##芩 ##芪 ##芫 ##芬 ##芭 ##芮 ##芯 ##花 ##芳 ##芷 ##芸 ##芹 ##芻 ##芽 ##芾 ##苁 ##苄 ##苇 ##苋 ##苍 ##苏 ##苑 ##苒 ##苓 ##苔 ##苕 ##苗 ##苛 ##苜 ##苞 ##苟 ##苡 ##苣 ##若 ##苦 ##苫 ##苯 ##英 ##苷 ##苹 ##苻 ##茁 ##茂 ##范 ##茄 ##茅 ##茉 ##茎 ##茏 ##茗 ##茜 ##茧 ##茨 ##茫 ##茬 ##茭 ##茯 ##茱 ##茲 ##茴 ##茵 ##茶 ##茸 ##茹 ##茼 ##荀 ##荃 ##荆 ##草 ##荊 ##荏 ##荐 ##荒 ##荔 ##荖 ##荘 ##荚 ##荞 ##荟 ##荠 ##荡 ##荣 ##荤 ##荥 ##荧 ##荨 ##荪 ##荫 ##药 ##荳 ##荷 ##荸 ##荻 ##荼 ##荽 ##莅 ##莆 ##莉 ##莊 ##莎 ##莒 ##莓 ##莖 ##莘 ##莞 ##莠 ##莢 ##莧 ##莪 ##莫 ##莱 ##莲 ##莴 ##获 ##莹 ##莺 ##莽 ##莿 ##菀 ##菁 ##菅 ##菇 ##菈 ##菊 ##菌 ##菏 ##菓 ##菖 ##菘 ##菜 ##菟 ##菠 ##菡 ##菩 ##華 ##菱 ##菲 ##菸 ##菽 ##萁 ##萃 ##萄 ##萊 ##萋 ##萌 ##萍 ##萎 ##萘 ##萝 ##萤 ##营 ##萦 ##萧 ##萨 ##萩 ##萬 ##萱 ##萵 ##萸 ##萼 ##落 ##葆 ##葉 ##著 ##葚 ##葛 ##葡 ##董 ##葦 ##葩 ##葫 ##葬 ##葭 ##葯 ##葱 ##葳 ##葵 ##葷 ##葺 ##蒂 ##蒋 ##蒐 ##蒔 ##蒙 ##蒜 ##蒞 ##蒟 ##蒡 ##蒨 ##蒲 ##蒸 ##蒹 ##蒻 ##蒼 ##蒿 ##蓁 ##蓄 ##蓆 ##蓉 ##蓋 ##蓑 ##蓓 ##蓖 ##蓝 ##蓟 ##蓦 ##蓬 ##蓮 ##蓼 ##蓿 ##蔑 ##蔓 ##蔔 ##蔗 ##蔘 ##蔚 ##蔡 ##蔣 ##蔥 ##蔫 ##蔬 ##蔭 ##蔵 ##蔷 ##蔺 ##蔻 ##蔼 ##蔽 ##蕁 ##蕃 ##蕈 ##蕉 ##蕊 ##蕎 ##蕙 ##蕤 ##蕨 ##蕩 ##蕪 ##蕭 ##蕲 ##蕴 ##蕻 ##蕾 ##薄 ##薅 ##薇 ##薈 ##薊 ##薏 ##薑 ##薔 ##薙 ##薛 ##薦 ##薨 ##薩 ##薪 ##薬 ##薯 ##薰 ##薹 ##藉 ##藍 ##藏 ##藐 ##藓 ##藕 ##藜 ##藝 ##藤 ##藥 ##藩 ##藹 ##藻 ##藿 ##蘆 ##蘇 ##蘊 ##蘋 ##蘑 ##蘚 ##蘭 ##蘸 ##蘼 ##蘿 ##虎 ##虏 ##虐 ##虑 ##虔 ##處 ##虚 ##虛 ##虜 ##虞 ##號 ##虢 ##虧 ##虫 ##虬 ##虱 ##虹 ##虻 ##虽 ##虾 ##蚀 ##蚁 ##蚂 ##蚊 ##蚌 ##蚓 ##蚕 ##蚜 ##蚝 ##蚣 ##蚤 ##蚩 ##蚪 ##蚯 ##蚱 ##蚵 ##蛀 ##蛆 ##蛇 ##蛊 ##蛋 ##蛎 ##蛐 ##蛔 ##蛙 ##蛛 ##蛟 ##蛤 ##蛭 ##蛮 ##蛰 ##蛳 ##蛹 ##蛻 ##蛾 ##蜀 ##蜂 ##蜃 ##蜆 ##蜇 ##蜈 ##蜊 ##蜍 ##蜒 ##蜓 ##蜕 ##蜗 ##蜘 ##蜚 ##蜜 ##蜡 ##蜢 ##蜥 ##蜱 ##蜴 ##蜷 ##蜻 ##蜿 ##蝇 ##蝈 ##蝉 ##蝌 ##蝎 ##蝕 ##蝗 ##蝙 ##蝟 ##蝠 ##蝦 ##蝨 ##蝴 ##蝶 ##蝸 ##蝼 ##螂 ##螃 ##融 ##螞 ##螢 ##螨 ##螯 ##螳 ##螺 ##蟀 ##蟄 ##蟆 ##蟋 ##蟎 ##蟑 ##蟒 ##蟠 ##蟬 ##蟲 ##蟹 ##蟻 ##蟾 ##蠅 ##蠍 ##蠔 ##蠕 ##蠛 ##蠟 ##蠡 ##蠢 ##蠣 ##蠱 ##蠶 ##蠹 ##蠻 ##血 ##衄 ##衅 ##衆 ##行 ##衍 ##術 ##衔 ##街 ##衙 ##衛 ##衝 ##衞 ##衡 ##衢 ##衣 ##补 ##表 ##衩 ##衫 ##衬 ##衮 ##衰 ##衲 ##衷 ##衹 ##衾 ##衿 ##袁 ##袂 ##袄 ##袅 ##袈 ##袋 ##袍 ##袒 ##袖 ##袜 ##袞 ##袤 ##袪 ##被 ##袭 ##袱 ##裁 ##裂 ##装 ##裆 ##裊 ##裏 ##裔 ##裕 ##裘 ##裙 ##補 ##裝 ##裟 ##裡 ##裤 ##裨 ##裱 ##裳 ##裴 ##裸 ##裹 ##製 ##裾 ##褂 ##複 ##褐 ##褒 ##褓 ##褔 ##褚 ##褥 ##褪 ##褫 ##褲 ##褶 ##褻 ##襁 ##襄 ##襟 ##襠 ##襪 ##襬 ##襯 ##襲 ##西 ##要 ##覃 ##覆 ##覇 ##見 ##規 ##覓 ##視 ##覚 ##覦 ##覧 ##親 ##覬 ##観 ##覷 ##覺 ##覽 ##觀 ##见 ##观 ##规 ##觅 ##视 ##览 ##觉 ##觊 ##觎 ##觐 ##觑 ##角 ##觞 ##解 ##觥 ##触 ##觸 ##言 ##訂 ##計 ##訊 ##討 ##訓 ##訕 ##訖 ##託 ##記 ##訛 ##訝 ##訟 ##訣 ##訥 ##訪 ##設 ##許 ##訳 ##訴 ##訶 ##診 ##註 ##証 ##詆 ##詐 ##詔 ##評 ##詛 ##詞 ##詠 ##詡 ##詢 ##詣 ##試 ##詩 ##詫 ##詬 ##詭 ##詮 ##詰 ##話 ##該 ##詳 ##詹 ##詼 ##誅 ##誇 ##誉 ##誌 ##認 ##誓 ##誕 ##誘 ##語 ##誠 ##誡 ##誣 ##誤 ##誥 ##誦 ##誨 ##說 ##説 ##読 ##誰 ##課 ##誹 ##誼 ##調 ##諄 ##談 ##請 ##諏 ##諒 ##論 ##諗 ##諜 ##諡 ##諦 ##諧 ##諫 ##諭 ##諮 ##諱 ##諳 ##諷 ##諸 ##諺 ##諾 ##謀 ##謁 ##謂 ##謄 ##謊 ##謎 ##謐 ##謔 ##謗 ##謙 ##講 ##謝 ##謠 ##謨 ##謬 ##謹 ##謾 ##譁 ##證 ##譎 ##譏 ##識 ##譙 ##譚 ##譜 ##警 ##譬 ##譯 ##議 ##譲 ##譴 ##護 ##譽 ##讀 ##變 ##讓 ##讚 ##讞 ##计 ##订 ##认 ##讥 ##讧 ##讨 ##让 ##讪 ##讫 ##训 ##议 ##讯 ##记 ##讲 ##讳 ##讴 ##讶 ##讷 ##许 ##讹 ##论 ##讼 ##讽 ##设 ##访 ##诀 ##证 ##诃 ##评 ##诅 ##识 ##诈 ##诉 ##诊 ##诋 ##词 ##诏 ##译 ##试 ##诗 ##诘 ##诙 ##诚 ##诛 ##话 ##诞 ##诟 ##诠 ##诡 ##询 ##诣 ##诤 ##该 ##详 ##诧 ##诩 ##诫 ##诬 ##语 ##误 ##诰 ##诱 ##诲 ##说 ##诵 ##诶 ##请 ##诸 ##诺 ##读 ##诽 ##课 ##诿 ##谀 ##谁 ##调 ##谄 ##谅 ##谆 ##谈 ##谊 ##谋 ##谌 ##谍 ##谎 ##谏 ##谐 ##谑 ##谒 ##谓 ##谔 ##谕 ##谗 ##谘 ##谙 ##谚 ##谛 ##谜 ##谟 ##谢 ##谣 ##谤 ##谥 ##谦 ##谧 ##谨 ##谩 ##谪 ##谬 ##谭 ##谯 ##谱 ##谲 ##谴 ##谶 ##谷 ##豁 ##豆 ##豇 ##豈 ##豉 ##豊 ##豌 ##豎 ##豐 ##豔 ##豚 ##象 ##豢 ##豪 ##豫 ##豬 ##豹 ##豺 ##貂 ##貅 ##貌 ##貓 ##貔 ##貘 ##貝 ##貞 ##負 ##財 ##貢 ##貧 ##貨 ##販 ##貪 ##貫 ##責 ##貯 ##貰 ##貳 ##貴 ##貶 ##買 ##貸 ##費 ##貼 ##貽 ##貿 ##賀 ##賁 ##賂 ##賃 ##賄 ##資 ##賈 ##賊 ##賑 ##賓 ##賜 ##賞 ##賠 ##賡 ##賢 ##賣 ##賤 ##賦 ##質 ##賬 ##賭 ##賴 ##賺 ##購 ##賽 ##贅 ##贈 ##贊 ##贍 ##贏 ##贓 ##贖 ##贛 ##贝 ##贞 ##负 ##贡 ##财 ##责 ##贤 ##败 ##账 ##货 ##质 ##贩 ##贪 ##贫 ##贬 ##购 ##贮 ##贯 ##贰 ##贱 ##贲 ##贴 ##贵 ##贷 ##贸 ##费 ##贺 ##贻 ##贼 ##贾 ##贿 ##赁 ##赂 ##赃 ##资 ##赅 ##赈 ##赊 ##赋 ##赌 ##赎 ##赏 ##赐 ##赓 ##赔 ##赖 ##赘 ##赚 ##赛 ##赝 ##赞 ##赠 ##赡 ##赢 ##赣 ##赤 ##赦 ##赧 ##赫 ##赭 ##走 ##赳 ##赴 ##赵 ##赶 ##起 ##趁 ##超 ##越 ##趋 ##趕 ##趙 ##趟 ##趣 ##趨 ##足 ##趴 ##趵 ##趸 ##趺 ##趾 ##跃 ##跄 ##跆 ##跋 ##跌 ##跎 ##跑 ##跖 ##跚 ##跛 ##距 ##跟 ##跡 ##跤 ##跨 ##跩 ##跪 ##路 ##跳 ##践 ##跷 ##跹 ##跺 ##跻 ##踉 ##踊 ##踌 ##踏 ##踐 ##踝 ##踞 ##踟 ##踢 ##踩 ##踪 ##踮 ##踱 ##踴 ##踵 ##踹 ##蹂 ##蹄 ##蹇 ##蹈 ##蹉 ##蹊 ##蹋 ##蹑 ##蹒 ##蹙 ##蹟 ##蹣 ##蹤 ##蹦 ##蹩 ##蹬 ##蹭 ##蹲 ##蹴 ##蹶 ##蹺 ##蹼 ##蹿 ##躁 ##躇 ##躉 ##躊 ##躋 ##躍 ##躏 ##躪 ##身 ##躬 ##躯 ##躲 ##躺 ##軀 ##車 ##軋 ##軌 ##軍 ##軒 ##軟 ##転 ##軸 ##軼 ##軽 ##軾 ##較 ##載 ##輒 ##輓 ##輔 ##輕 ##輛 ##輝 ##輟 ##輩 ##輪 ##輯 ##輸 ##輻 ##輾 ##輿 ##轄 ##轅 ##轆 ##轉 ##轍 ##轎 ##轟 ##车 ##轧 ##轨 ##轩 ##转 ##轭 ##轮 ##软 ##轰 ##轲 ##轴 ##轶 ##轻 ##轼 ##载 ##轿 ##较 ##辄 ##辅 ##辆 ##辇 ##辈 ##辉 ##辊 ##辍 ##辐 ##辑 ##输 ##辕 ##辖 ##辗 ##辘 ##辙 ##辛 ##辜 ##辞 ##辟 ##辣 ##辦 ##辨 ##辩 ##辫 ##辭 ##辮 ##辯 ##辰 ##辱 ##農 ##边 ##辺 ##辻 ##込 ##辽 ##达 ##迁 ##迂 ##迄 ##迅 ##过 ##迈 ##迎 ##运 ##近 ##返 ##还 ##这 ##进 ##远 ##违 ##连 ##迟 ##迢 ##迤 ##迥 ##迦 ##迩 ##迪 ##迫 ##迭 ##述 ##迴 ##迷 ##迸 ##迹 ##迺 ##追 ##退 ##送 ##适 ##逃 ##逅 ##逆 ##选 ##逊 ##逍 ##透 ##逐 ##递 ##途 ##逕 ##逗 ##這 ##通 ##逛 ##逝 ##逞 ##速 ##造 ##逢 ##連 ##逮 ##週 ##進 ##逵 ##逶 ##逸 ##逻 ##逼 ##逾 ##遁 ##遂 ##遅 ##遇 ##遊 ##運 ##遍 ##過 ##遏 ##遐 ##遑 ##遒 ##道 ##達 ##違 ##遗 ##遙 ##遛 ##遜 ##遞 ##遠 ##遢 ##遣 ##遥 ##遨 ##適 ##遭 ##遮 ##遲 ##遴 ##遵 ##遶 ##遷 ##選 ##遺 ##遼 ##遽 ##避 ##邀 ##邁 ##邂 ##邃 ##還 ##邇 ##邈 ##邊 ##邋 ##邏 ##邑 ##邓 ##邕 ##邛 ##邝 ##邢 ##那 ##邦 ##邨 ##邪 ##邬 ##邮 ##邯 ##邰 ##邱 ##邳 ##邵 ##邸 ##邹 ##邺 ##邻 ##郁 ##郅 ##郊 ##郎 ##郑 ##郜 ##郝 ##郡 ##郢 ##郤 ##郦 ##郧 ##部 ##郫 ##郭 ##郴 ##郵 ##郷 ##郸 ##都 ##鄂 ##鄉 ##鄒 ##鄔 ##鄙 ##鄞 ##鄢 ##鄧 ##鄭 ##鄰 ##鄱 ##鄲 ##鄺 ##酉 ##酊 ##酋 ##酌 ##配 ##酐 ##酒 ##酗 ##酚 ##酝 ##酢 ##酣 ##酥 ##酩 ##酪 ##酬 ##酮 ##酯 ##酰 ##酱 ##酵 ##酶 ##酷 ##酸 ##酿 ##醃 ##醇 ##醉 ##醋 ##醍 ##醐 ##醒 ##醚 ##醛 ##醜 ##醞 ##醣 ##醪 ##醫 ##醬 ##醮 ##醯 ##醴 ##醺 ##釀 ##釁 ##采 ##釉 ##释 ##釋 ##里 ##重 ##野 ##量 ##釐 ##金 ##釗 ##釘 ##釜 ##針 ##釣 ##釦 ##釧 ##釵 ##鈀 ##鈉 ##鈍 ##鈎 ##鈔 ##鈕 ##鈞 ##鈣 ##鈦 ##鈪 ##鈴 ##鈺 ##鈾 ##鉀 ##鉄 ##鉅 ##鉉 ##鉑 ##鉗 ##鉚 ##鉛 ##鉤 ##鉴 ##鉻 ##銀 ##銃 ##銅 ##銑 ##銓 ##銖 ##銘 ##銜 ##銬 ##銭 ##銮 ##銳 ##銷 ##銹 ##鋁 ##鋅 ##鋒 ##鋤 ##鋪 ##鋰 ##鋸 ##鋼 ##錄 ##錐 ##錘 ##錚 ##錠 ##錢 ##錦 ##錨 ##錫 ##錮 ##錯 ##録 ##錳 ##錶 ##鍊 ##鍋 ##鍍 ##鍛 ##鍥 ##鍰 ##鍵 ##鍺 ##鍾 ##鎂 ##鎊 ##鎌 ##鎏 ##鎔 ##鎖 ##鎗 ##鎚 ##鎧 ##鎬 ##鎮 ##鎳 ##鏈 ##鏖 ##鏗 ##鏘 ##鏞 ##鏟 ##鏡 ##鏢 ##鏤 ##鏽 ##鐘 ##鐮 ##鐲 ##鐳 ##鐵 ##鐸 ##鐺 ##鑄 ##鑊 ##鑑 ##鑒 ##鑣 ##鑫 ##鑰 ##鑲 ##鑼 ##鑽 ##鑾 ##鑿 ##针 ##钉 ##钊 ##钎 ##钏 ##钒 ##钓 ##钗 ##钙 ##钛 ##钜 ##钝 ##钞 ##钟 ##钠 ##钡 ##钢 ##钣 ##钤 ##钥 ##钦 ##钧 ##钨 ##钩 ##钮 ##钯 ##钰 ##钱 ##钳 ##钴 ##钵 ##钺 ##钻 ##钼 ##钾 ##钿 ##铀 ##铁 ##铂 ##铃 ##铄 ##铅 ##铆 ##铉 ##铎 ##铐 ##铛 ##铜 ##铝 ##铠 ##铡 ##铢 ##铣 ##铤 ##铨 ##铩 ##铬 ##铭 ##铮 ##铰 ##铲 ##铵 ##银 ##铸 ##铺 ##链 ##铿 ##销 ##锁 ##锂 ##锄 ##锅 ##锆 ##锈 ##锉 ##锋 ##锌 ##锏 ##锐 ##锑 ##错 ##锚 ##锟 ##锡 ##锢 ##锣 ##锤 ##锥 ##锦 ##锭 ##键 ##锯 ##锰 ##锲 ##锵 ##锹 ##锺 ##锻 ##镀 ##镁 ##镂 ##镇 ##镉 ##镌 ##镍 ##镐 ##镑 ##镕 ##镖 ##镗 ##镛 ##镜 ##镣 ##镭 ##镯 ##镰 ##镳 ##镶 ##長 ##长 ##門 ##閃 ##閉 ##開 ##閎 ##閏 ##閑 ##閒 ##間 ##閔 ##閘 ##閡 ##関 ##閣 ##閥 ##閨 ##閩 ##閱 ##閲 ##閹 ##閻 ##閾 ##闆 ##闇 ##闊 ##闌 ##闍 ##闔 ##闕 ##闖 ##闘 ##關 ##闡 ##闢 ##门 ##闪 ##闫 ##闭 ##问 ##闯 ##闰 ##闲 ##间 ##闵 ##闷 ##闸 ##闹 ##闺 ##闻 ##闽 ##闾 ##阀 ##阁 ##阂 ##阅 ##阆 ##阇 ##阈 ##阉 ##阎 ##阐 ##阑 ##阔 ##阕 ##阖 ##阙 ##阚 ##阜 ##队 ##阡 ##阪 ##阮 ##阱 ##防 ##阳 ##阴 ##阵 ##阶 ##阻 ##阿 ##陀 ##陂 ##附 ##际 ##陆 ##陇 ##陈 ##陋 ##陌 ##降 ##限 ##陕 ##陛 ##陝 ##陞 ##陟 ##陡 ##院 ##陣 ##除 ##陨 ##险 ##陪 ##陰 ##陲 ##陳 ##陵 ##陶 ##陷 ##陸 ##険 ##陽 ##隅 ##隆 ##隈 ##隊 ##隋 ##隍 ##階 ##随 ##隐 ##隔 ##隕 ##隘 ##隙 ##際 ##障 ##隠 ##隣 ##隧 ##隨 ##險 ##隱 ##隴 ##隶 ##隸 ##隻 ##隼 ##隽 ##难 ##雀 ##雁 ##雄 ##雅 ##集 ##雇 ##雉 ##雋 ##雌 ##雍 ##雎 ##雏 ##雑 ##雒 ##雕 ##雖 ##雙 ##雛 ##雜 ##雞 ##離 ##難 ##雨 ##雪 ##雯 ##雰 ##雲 ##雳 ##零 ##雷 ##雹 ##電 ##雾 ##需 ##霁 ##霄 ##霆 ##震 ##霈 ##霉 ##霊 ##霍 ##霎 ##霏 ##霑 ##霓 ##霖 ##霜 ##霞 ##霧 ##霭 ##霰 ##露 ##霸 ##霹 ##霽 ##霾 ##靂 ##靄 ##靈 ##青 ##靓 ##靖 ##静 ##靚 ##靛 ##靜 ##非 ##靠 ##靡 ##面 ##靥 ##靦 ##革 ##靳 ##靴 ##靶 ##靼 ##鞅 ##鞋 ##鞍 ##鞏 ##鞑 ##鞘 ##鞠 ##鞣 ##鞦 ##鞭 ##韆 ##韋 ##韌 ##韓 ##韜 ##韦 ##韧 ##韩 ##韬 ##韭 ##音 ##韵 ##韶 ##韻 ##響 ##頁 ##頂 ##頃 ##項 ##順 ##須 ##頌 ##預 ##頑 ##頒 ##頓 ##頗 ##領 ##頜 ##頡 ##頤 ##頫 ##頭 ##頰 ##頷 ##頸 ##頹 ##頻 ##頼 ##顆 ##題 ##額 ##顎 ##顏 ##顔 ##願 ##顛 ##類 ##顧 ##顫 ##顯 ##顱 ##顴 ##页 ##顶 ##顷 ##项 ##顺 ##须 ##顼 ##顽 ##顾 ##顿 ##颁 ##颂 ##预 ##颅 ##领 ##颇 ##颈 ##颉 ##颊 ##颌 ##颍 ##颐 ##频 ##颓 ##颔 ##颖 ##颗 ##题 ##颚 ##颛 ##颜 ##额 ##颞 ##颠 ##颡 ##颢 ##颤 ##颦 ##颧 ##風 ##颯 ##颱 ##颳 ##颶 ##颼 ##飄 ##飆 ##风 ##飒 ##飓 ##飕 ##飘 ##飙 ##飚 ##飛 ##飞 ##食 ##飢 ##飨 ##飩 ##飪 ##飯 ##飲 ##飼 ##飽 ##飾 ##餃 ##餅 ##餉 ##養 ##餌 ##餐 ##餒 ##餓 ##餘 ##餚 ##餛 ##餞 ##餡 ##館 ##餮 ##餵 ##餾 ##饅 ##饈 ##饋 ##饌 ##饍 ##饑 ##饒 ##饕 ##饗 ##饞 ##饥 ##饨 ##饪 ##饬 ##饭 ##饮 ##饯 ##饰 ##饱 ##饲 ##饴 ##饵 ##饶 ##饷 ##饺 ##饼 ##饽 ##饿 ##馀 ##馁 ##馄 ##馅 ##馆 ##馈 ##馋 ##馍 ##馏 ##馒 ##馔 ##首 ##馗 ##香 ##馥 ##馨 ##馬 ##馭 ##馮 ##馳 ##馴 ##駁 ##駄 ##駅 ##駆 ##駐 ##駒 ##駕 ##駛 ##駝 ##駭 ##駱 ##駿 ##騁 ##騎 ##騏 ##験 ##騙 ##騨 ##騰 ##騷 ##驀 ##驅 ##驊 ##驍 ##驒 ##驕 ##驗 ##驚 ##驛 ##驟 ##驢 ##驥 ##马 ##驭 ##驮 ##驯 ##驰 ##驱 ##驳 ##驴 ##驶 ##驷 ##驸 ##驹 ##驻 ##驼 ##驾 ##驿 ##骁 ##骂 ##骄 ##骅 ##骆 ##骇 ##骈 ##骊 ##骋 ##验 ##骏 ##骐 ##骑 ##骗 ##骚 ##骛 ##骜 ##骞 ##骠 ##骡 ##骤 ##骥 ##骧 ##骨 ##骯 ##骰 ##骶 ##骷 ##骸 ##骼 ##髂 ##髅 ##髋 ##髏 ##髒 ##髓 ##體 ##髖 ##高 ##髦 ##髪 ##髮 ##髯 ##髻 ##鬃 ##鬆 ##鬍 ##鬓 ##鬚 ##鬟 ##鬢 ##鬣 ##鬥 ##鬧 ##鬱 ##鬼 ##魁 ##魂 ##魄 ##魅 ##魇 ##魍 ##魏 ##魔 ##魘 ##魚 ##魯 ##魷 ##鮑 ##鮨 ##鮪 ##鮭 ##鮮 ##鯉 ##鯊 ##鯖 ##鯛 ##鯨 ##鯰 ##鯽 ##鰍 ##鰓 ##鰭 ##鰲 ##鰻 ##鰾 ##鱈 ##鱉 ##鱔 ##鱗 ##鱷 ##鱸 ##鱼 ##鱿 ##鲁 ##鲈 ##鲍 ##鲑 ##鲛 ##鲜 ##鲟 ##鲢 ##鲤 ##鲨 ##鲫 ##鲱 ##鲲 ##鲶 ##鲷 ##鲸 ##鳃 ##鳄 ##鳅 ##鳌 ##鳍 ##鳕 ##鳖 ##鳗 ##鳝 ##鳞 ##鳥 ##鳩 ##鳳 ##鳴 ##鳶 ##鴉 ##鴕 ##鴛 ##鴦 ##鴨 ##鴻 ##鴿 ##鵑 ##鵜 ##鵝 ##鵡 ##鵬 ##鵰 ##鵲 ##鶘 ##鶩 ##鶯 ##鶴 ##鷗 ##鷲 ##鷹 ##鷺 ##鸚 ##鸞 ##鸟 ##鸠 ##鸡 ##鸢 ##鸣 ##鸥 ##鸦 ##鸨 ##鸪 ##鸭 ##鸯 ##鸳 ##鸵 ##鸽 ##鸾 ##鸿 ##鹂 ##鹃 ##鹄 ##鹅 ##鹈 ##鹉 ##鹊 ##鹌 ##鹏 ##鹑 ##鹕 ##鹘 ##鹜 ##鹞 ##鹤 ##鹦 ##鹧 ##鹫 ##鹭 ##鹰 ##鹳 ##鹵 ##鹹 ##鹼 ##鹽 ##鹿 ##麂 ##麋 ##麒 ##麓 ##麗 ##麝 ##麟 ##麥 ##麦 ##麩 ##麴 ##麵 ##麸 ##麺 ##麻 ##麼 ##麽 ##麾 ##黃 ##黄 ##黍 ##黎 ##黏 ##黑 ##黒 ##黔 ##默 ##黛 ##黜 ##黝 ##點 ##黠 ##黨 ##黯 ##黴 ##鼋 ##鼎 ##鼐 ##鼓 ##鼠 ##鼬 ##鼹 ##鼻 ##鼾 ##齁 ##齊 ##齋 ##齐 ##齒 ##齡 ##齢 ##齣 ##齦 ##齿 ##龄 ##龅 ##龈 ##龊 ##龋 ##龌 ##龍 ##龐 ##龔 ##龕 ##龙 ##龚 ##龛 ##龜 ##龟 ##︰ ##︱ ##︶ ##︿ ##﹁ ##﹂ ##﹍ ##﹏ ##﹐ ##﹑ ##﹒ ##﹔ ##﹕ ##﹖ ##﹗ ##﹙ ##﹚ ##﹝ ##﹞ ##﹡ ##﹣ ##! ##" ### ##$ ##% ##& ##' ##( ##) ##* ##, ##- ##. ##/ ##: ##; ##< ##? ##@ ##[ ##\ ##] ##^ ##_ ##` ##f ##h ##j ##u ##w ##z ##{ ##} ##。 ##「 ##」 ##、 ##・ ##ッ ##ー ##イ ##ク ##シ ##ス ##ト ##ノ ##フ ##ラ ##ル ##ン ##゙ ##゚ ## ̄ ##¥ ##👍 ##🔥 ##😂 ##😎 ================================================ FILE: bert-chinese-web/config.py ================================================ import os root = os.path.abspath(os.path.dirname(__file__)) bert_base_chinese = os.path.join(root, 'bert-base-chinese/') # run device # or cuda device = 'cpu' # model max_summary_size = 128 load_from = os.path.join(root, 'models/bert_classifier/model_s.pt') vocab_path = os.path.join(bert_base_chinese, 'vocab.txt') bert_config_path = os.path.join(bert_base_chinese, 'config.json') # web iphost = '127.0.0.1' port = 8080 ================================================ FILE: bert-chinese-web/predict.py ================================================ #!/usr/bin/env python import torch from src.models.model_builder_LAI import Summarizer from src.prepro.data_builder import BertData, BatchExample from config import load_from, bert_config_path, vocab_path, max_summary_size import os class Bert_summary_model(object): def __init__(self, device=torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu")): self.device = device self.data_process = BertData(vocab_path=vocab_path, device=device) self.model = self.load_model(load_from) self.max_process_len = self.model.bert_config.max_position_embeddings - 2 def load_model(self, load_from): checkpoint = torch.load(load_from, map_location=lambda storage, loc: storage) print('loading....', load_from) model = Summarizer(self.device, bert_config_path=bert_config_path) model.load_cp(checkpoint) model.eval() return model def save(self): model_state_dict = self.model.state_dict() checkpoint = { 'model': model_state_dict, } checkpoint_path = os.path.join('models/bert_classifier', 'model_s.pt') if not os.path.exists(checkpoint_path): torch.save(checkpoint, checkpoint_path) return checkpoint, checkpoint_path print('saved:', checkpoint_path) def long_predict(self, document: str, max_summary_size=max_summary_size, min_sent_num=3): assert len(document) > self.max_process_len, '不够长' # 超过这个长度的切开 document_splits = self.data_process.split_long_doc(document, self.max_process_len) predict_s = [self.predict(document=doc_i, max_summary_size=max_summary_size) for doc_i in document_splits] rt = ''.join(predict_s) # 新的摘要,如果句子还太多 # document_splits = self.data_process.split_long_doc(rt, self.max_process_len) example, document_splits = self.data_process.preprocess(rt, min_sent_num=min_sent_num) if len(rt) > self.max_process_len and len(document_splits) <= 3: txt = document_splits[0] # 如果第一句话就超过了最大限定长度(总有一些奇葩句子就是这么变态) if len(txt) > max_summary_size: txt_arr = txt.split(',') txt = '' for ti in txt_arr: if len(txt + ti) < max_summary_size: txt += ti else: txt += ti txt = txt[:max_summary_size] break else: for ti in document_splits[1:]: if len(txt + ti) < self.max_process_len: txt += ti else: txt += ti txt = txt[:max_summary_size] break rt = txt # 依然满足长文本预测逻辑,继续递归下去 elif len(rt) > self.max_process_len and len(document_splits) > min_sent_num: rt = self.long_predict(rt) # 句子量满足了,但是总文本还是太长了,就缩小句子数 else: # 此时 len(rt)一定 < self.max_process_len ,进行正式predict逻辑 rt = self.predict(rt, max_summary_size, min_sent_num) return rt def predict(self, document: str, max_summary_size=max_summary_size, min_sent_num=3): # 如果低于最大要求长度,就不做摘要了 if len(document) <= max_summary_size: return document # 进行切分,如果句子数量低于min_sent_num返回的会是None(就2句话,模型取min_sent_num句最核心的), example, doc_sents = self.data_process.preprocess(document, min_sent_num=min_sent_num) if example is None or (len(document) > self.max_process_len) or len(doc_sents) <= min_sent_num: # 特殊问题特殊处理,(就2句话,还非常长,还预测干嘛?直接截断返回) return ''.join(doc_sents)[:max_summary_size] # _____推断_____ o_sent_scores, _ = self.model(example.src, example.segs, example.clss, example.src_mask, example.cls_mask) o_sent_scores_np = o_sent_scores.cpu().detach().numpy() sort_idx = o_sent_scores_np.argsort() # socore,大到小 索引 key_idx = sort_idx.tolist()[0][::-1] summary_idx = [] tp_summary = '' for ki in key_idx: sent_i = doc_sents[ki] if len(tp_summary) + len(sent_i) < max_summary_size: summary_idx.append(ki) tp_summary += sent_i # 以文章顺序写出 summary_idx = sorted(summary_idx) key_sents = [doc_sents[i] for i in summary_idx] rt = ''.join(key_sents) return rt if __name__ == '__main__': bert_summary_model = Bert_summary_model() bert_summary_model.test_batch_example() ================================================ FILE: bert-chinese-web/src/models/__init__.py ================================================ ================================================ FILE: bert-chinese-web/src/models/encoder.py ================================================ import math import torch import torch.nn as nn from src.models.neural import MultiHeadedAttention, PositionwiseFeedForward from src.models.rnn import LayerNormLSTM class Classifier(nn.Module): def __init__(self, hidden_size): super(Classifier, self).__init__() self.linear1 = nn.Linear(hidden_size, 1) self.sigmoid = nn.Sigmoid() def forward(self, sents_vec, mask_cls): h = self.linear1(sents_vec).squeeze(-1) sent_scores = self.sigmoid(h) * mask_cls.float() return sent_scores class PositionalEncoding(nn.Module): def __init__(self, dropout, dim, max_len=5000): pe = torch.zeros(max_len, dim) position = torch.arange(0, max_len).unsqueeze(1) div_term = torch.exp((torch.arange(0, dim, 2, dtype=torch.float) * -(math.log(10000.0) / dim))) pe[:, 0::2] = torch.sin(position.float() * div_term) pe[:, 1::2] = torch.cos(position.float() * div_term) pe = pe.unsqueeze(0) super(PositionalEncoding, self).__init__() self.register_buffer('pe', pe) self.dropout = nn.Dropout(p=dropout) self.dim = dim def forward(self, emb, step=None): emb = emb * math.sqrt(self.dim) if (step): emb = emb + self.pe[:, step][:, None, :] else: emb = emb + self.pe[:, :emb.size(1)] emb = self.dropout(emb) return emb def get_emb(self, emb): return self.pe[:, :emb.size(1)] class TransformerEncoderLayer(nn.Module): def __init__(self, d_model, heads, d_ff, dropout): super(TransformerEncoderLayer, self).__init__() self.self_attn = MultiHeadedAttention( heads, d_model, dropout=dropout) self.feed_forward = PositionwiseFeedForward(d_model, d_ff, dropout) self.layer_norm = nn.LayerNorm(d_model, eps=1e-6) self.dropout = nn.Dropout(dropout) def forward(self, iter, query, inputs, mask): if (iter != 0): input_norm = self.layer_norm(inputs) else: input_norm = inputs mask = mask.unsqueeze(1) context = self.self_attn(input_norm, input_norm, input_norm, mask=mask) out = self.dropout(context) + inputs return self.feed_forward(out) class TransformerInterEncoder(nn.Module): def __init__(self, d_model, d_ff, heads, dropout, num_inter_layers=0): super(TransformerInterEncoder, self).__init__() self.d_model = d_model self.num_inter_layers = num_inter_layers self.pos_emb = PositionalEncoding(dropout, d_model) self.transformer_inter = nn.ModuleList( [TransformerEncoderLayer(d_model, heads, d_ff, dropout) for _ in range(num_inter_layers)]) self.dropout = nn.Dropout(dropout) self.layer_norm = nn.LayerNorm(d_model, eps=1e-6) self.wo = nn.Linear(d_model, 1, bias=True) self.sigmoid = nn.Sigmoid() def forward(self, top_vecs, mask): """ See :obj:`EncoderBase.forward()`""" batch_size, n_sents = top_vecs.size(0), top_vecs.size(1) pos_emb = self.pos_emb.pe[:, :n_sents] x = top_vecs * mask[:, :, None].float() x = x + pos_emb for i in range(self.num_inter_layers): x = self.transformer_inter[i](i, x, x, 1 - mask) # all_sents * max_tokens * dim x = self.layer_norm(x) sent_scores = self.sigmoid(self.wo(x)) sent_scores = sent_scores.squeeze(-1) * mask.float() return sent_scores class RNNEncoder(nn.Module): def __init__(self, bidirectional, num_layers, input_size, hidden_size, dropout=0.0): super(RNNEncoder, self).__init__() num_directions = 2 if bidirectional else 1 assert hidden_size % num_directions == 0 hidden_size = hidden_size // num_directions self.rnn = LayerNormLSTM( input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, bidirectional=bidirectional) self.wo = nn.Linear(num_directions * hidden_size, 1, bias=True) self.dropout = nn.Dropout(dropout) self.sigmoid = nn.Sigmoid() def forward(self, x, mask): """See :func:`EncoderBase.forward()`""" x = torch.transpose(x, 1, 0) memory_bank, _ = self.rnn(x) memory_bank = self.dropout(memory_bank) + x memory_bank = torch.transpose(memory_bank, 1, 0) sent_scores = self.sigmoid(self.wo(memory_bank)) sent_scores = sent_scores.squeeze(-1) * mask.float() return sent_scores ================================================ FILE: bert-chinese-web/src/models/model_builder_LAI.py ================================================ import torch import torch.nn as nn from transformers import BertModel, BertConfig from src.models.encoder import Classifier class Bert(nn.Module): def __init__(self, bert_config): super(Bert, self).__init__() self.model = BertModel(bert_config) def forward(self, x, segs, mask): encoded_layers, _ = self.model(x, attention_mask=mask, token_type_ids=segs) # top_vec = encoded_layers[-1] return encoded_layers class Summarizer(nn.Module): def __init__(self, device, bert_config_path=None): super(Summarizer, self).__init__() self.device = device self.bert_config = BertConfig.from_json_file(bert_config_path) self.bert = Bert(self.bert_config) self.encoder = Classifier(self.bert.model.config.hidden_size) self.to(device) def load_cp(self, pt): self.load_state_dict(pt['model'], strict=True) def forward(self, x, segs, clss, mask, mask_cls, sentence_range=None): top_vec = self.bert(x, segs, mask) sents_vec = top_vec[torch.arange(top_vec.size(0)).unsqueeze(1), clss] sents_vec = sents_vec * mask_cls[:, :, None].float() sent_scores = self.encoder(sents_vec, mask_cls).squeeze(-1) return sent_scores, mask_cls ================================================ FILE: bert-chinese-web/src/models/neural.py ================================================ import math import torch import torch.nn as nn def gelu(x): return 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) class PositionwiseFeedForward(nn.Module): """ A two-layer Feed-Forward-Network with residual layer norm. Args: d_model (int): the size of input for the first-layer of the FFN. d_ff (int): the hidden layer size of the second-layer of the FNN. dropout (float): dropout probability in :math:`[0, 1)`. """ def __init__(self, d_model, d_ff, dropout=0.1): super(PositionwiseFeedForward, self).__init__() self.w_1 = nn.Linear(d_model, d_ff) self.w_2 = nn.Linear(d_ff, d_model) self.layer_norm = nn.LayerNorm(d_model, eps=1e-6) self.actv = gelu self.dropout_1 = nn.Dropout(dropout) self.dropout_2 = nn.Dropout(dropout) def forward(self, x): inter = self.dropout_1(self.actv(self.w_1(self.layer_norm(x)))) output = self.dropout_2(self.w_2(inter)) return output + x class MultiHeadedAttention(nn.Module): """ Multi-Head Attention module from "Attention is All You Need" :cite:`DBLP:journals/corr/VaswaniSPUJGKP17`. Similar to standard `dot` attention but uses multiple attention distributions simulataneously to select relevant items. .. mermaid:: graph BT A[key] B[value] C[query] O[output] subgraph Attn D[Attn 1] E[Attn 2] F[Attn N] end A --> D C --> D A --> E C --> E A --> F C --> F D --> O E --> O F --> O B --> O Also includes several additional tricks. Args: head_count (int): number of parallel heads model_dim (int): the dimension of keys/values/queries, must be divisible by head_count dropout (float): dropout parameter """ def __init__(self, head_count, model_dim, dropout=0.1, use_final_linear=True): assert model_dim % head_count == 0 self.dim_per_head = model_dim // head_count self.model_dim = model_dim super(MultiHeadedAttention, self).__init__() self.head_count = head_count self.linear_keys = nn.Linear(model_dim, head_count * self.dim_per_head) self.linear_values = nn.Linear(model_dim, head_count * self.dim_per_head) self.linear_query = nn.Linear(model_dim, head_count * self.dim_per_head) self.softmax = nn.Softmax(dim=-1) self.dropout = nn.Dropout(dropout) self.use_final_linear = use_final_linear if (self.use_final_linear): self.final_linear = nn.Linear(model_dim, model_dim) def forward(self, key, value, query, mask=None, layer_cache=None, type=None, predefined_graph_1=None): """ Compute the context vector and the attention vectors. Args: key (`FloatTensor`): set of `key_len` key vectors `[batch, key_len, dim]` value (`FloatTensor`): set of `key_len` value vectors `[batch, key_len, dim]` query (`FloatTensor`): set of `query_len` query vectors `[batch, query_len, dim]` mask: binary mask indicating which keys have non-zero attention `[batch, query_len, key_len]` Returns: (`FloatTensor`, `FloatTensor`) : * output context vectors `[batch, query_len, dim]` * one of the attention vectors `[batch, query_len, key_len]` """ # CHECKS # batch, k_len, d = key.size() # batch_, k_len_, d_ = value.size() # aeq(batch, batch_) # aeq(k_len, k_len_) # aeq(d, d_) # batch_, q_len, d_ = query.size() # aeq(batch, batch_) # aeq(d, d_) # aeq(self.model_dim % 8, 0) # if mask is not None: # batch_, q_len_, k_len_ = mask.size() # aeq(batch_, batch) # aeq(k_len_, k_len) # aeq(q_len_ == q_len) # END CHECKS batch_size = key.size(0) dim_per_head = self.dim_per_head head_count = self.head_count key_len = key.size(1) query_len = query.size(1) def shape(x): """ projection """ return x.view(batch_size, -1, head_count, dim_per_head) \ .transpose(1, 2) def unshape(x): """ compute context """ return x.transpose(1, 2).contiguous() \ .view(batch_size, -1, head_count * dim_per_head) # 1) Project key, value, and query. if layer_cache is not None: if type == "self": query, key, value = self.linear_query(query), \ self.linear_keys(query), \ self.linear_values(query) key = shape(key) value = shape(value) if layer_cache is not None: device = key.device if layer_cache["self_keys"] is not None: key = torch.cat( (layer_cache["self_keys"].to(device), key), dim=2) if layer_cache["self_values"] is not None: value = torch.cat( (layer_cache["self_values"].to(device), value), dim=2) layer_cache["self_keys"] = key layer_cache["self_values"] = value elif type == "context": query = self.linear_query(query) if layer_cache is not None: if layer_cache["memory_keys"] is None: key, value = self.linear_keys(key), \ self.linear_values(value) key = shape(key) value = shape(value) else: key, value = layer_cache["memory_keys"], \ layer_cache["memory_values"] layer_cache["memory_keys"] = key layer_cache["memory_values"] = value else: key, value = self.linear_keys(key), \ self.linear_values(value) key = shape(key) value = shape(value) else: key = self.linear_keys(key) value = self.linear_values(value) query = self.linear_query(query) key = shape(key) value = shape(value) query = shape(query) key_len = key.size(2) query_len = query.size(2) # 2) Calculate and scale scores. query = query / math.sqrt(dim_per_head) scores = torch.matmul(query, key.transpose(2, 3)) if mask is not None: mask = mask.unsqueeze(1).expand_as(scores) scores = scores.masked_fill(mask, -1e18) # 3) Apply attention dropout and compute context vectors. attn = self.softmax(scores) if (not predefined_graph_1 is None): attn_masked = attn[:, -1] * predefined_graph_1 attn_masked = attn_masked / (torch.sum(attn_masked, 2).unsqueeze(2) + 1e-9) attn = torch.cat([attn[:, :-1], attn_masked.unsqueeze(1)], 1) drop_attn = self.dropout(attn) if (self.use_final_linear): context = unshape(torch.matmul(drop_attn, value)) output = self.final_linear(context) return output else: context = torch.matmul(drop_attn, value) return context # CHECK # batch_, q_len_, d_ = output.size() # aeq(q_len, q_len_) # aeq(batch, batch_) # aeq(d, d_) # Return one attn ================================================ FILE: bert-chinese-web/src/models/optimizers.py ================================================ """ Optimizers class """ import torch import torch.optim as optim from torch.nn.utils import clip_grad_norm_ def use_gpu(opt): """ Creates a boolean if gpu used """ return (hasattr(opt, 'gpu_ranks') and len(opt.gpu_ranks) > 0) or \ (hasattr(opt, 'gpu') and opt.gpu > -1) def build_optim(model, opt, checkpoint): """ Build optimizer """ saved_optimizer_state_dict = None if opt.train_from: optim = checkpoint['optim'] # We need to save a copy of optim.optimizer.state_dict() for setting # the, optimizer state later on in Stage 2 in this method, since # the method optim.set_parameters(model.parameters()) will overwrite # optim.optimizer, and with ith the values stored in # optim.optimizer.state_dict() saved_optimizer_state_dict = optim.optimizer.state_dict() else: optim = Optimizer( opt.optim, opt.learning_rate, opt.max_grad_norm, lr_decay=opt.learning_rate_decay, start_decay_steps=opt.start_decay_steps, decay_steps=opt.decay_steps, beta1=opt.adam_beta1, beta2=opt.adam_beta2, adagrad_accum=opt.adagrad_accumulator_init, decay_method=opt.decay_method, warmup_steps=opt.warmup_steps) # Stage 1: # Essentially optim.set_parameters (re-)creates and optimizer using # model.paramters() as parameters that will be stored in the # optim.optimizer.param_groups field of the torch optimizer class. # Importantly, this method does not yet load the optimizer state, as # essentially it builds a new optimizer with empty optimizer state and # parameters from the model. optim.set_parameters(model.named_parameters()) if opt.train_from: # Stage 2: In this stage, which is only performed when loading an # optimizer from a checkpoint, we load the saved_optimizer_state_dict # into the re-created optimizer, to set the optim.optimizer.state # field, which was previously empty. For this, we use the optimizer # state saved in the "saved_optimizer_state_dict" variable for # this purpose. # See also: https://github.com/pytorch/pytorch/issues/2830 optim.optimizer.load_state_dict(saved_optimizer_state_dict) # Convert back the state values to cuda type if applicable if use_gpu(opt): for state in optim.optimizer.state.values(): for k, v in state.items(): if torch.is_tensor(v): state[k] = v.cuda() # We want to make sure that indeed we have a non-empty optimizer state # when we loaded an existing model. This should be at least the case # for Adam, which saves "exp_avg" and "exp_avg_sq" state # (Exponential moving average of gradient and squared gradient values) if (optim.method == 'adam') and (len(optim.optimizer.state) < 1): raise RuntimeError( "Error: loaded Adam optimizer from existing model" + " but optimizer state is empty") return optim class MultipleOptimizer(object): """ Implement multiple optimizers needed for sparse adam """ def __init__(self, op): """ ? """ self.optimizers = op def zero_grad(self): """ ? """ for op in self.optimizers: op.zero_grad() def step(self): """ ? """ for op in self.optimizers: op.step() @property def state(self): """ ? """ return {k: v for op in self.optimizers for k, v in op.state.items()} def state_dict(self): """ ? """ return [op.state_dict() for op in self.optimizers] def load_state_dict(self, state_dicts): """ ? """ assert len(state_dicts) == len(self.optimizers) for i in range(len(state_dicts)): self.optimizers[i].load_state_dict(state_dicts[i]) class Optimizer(object): """ Controller class for optimization. Mostly a thin wrapper for `optim`, but also useful for implementing rate scheduling beyond what is currently available. Also implements necessary methods for training RNNs such as grad manipulations. Args: method (:obj:`str`): one of [sgd, adagrad, adadelta, adam] lr (float): learning rate lr_decay (float, optional): learning rate decay multiplier start_decay_steps (int, optional): step to start learning rate decay beta1, beta2 (float, optional): parameters for adam adagrad_accum (float, optional): initialization parameter for adagrad decay_method (str, option): custom decay options warmup_steps (int, option): parameter for `noam` decay We use the default parameters for Adam that are suggested by the original paper https://arxiv.org/pdf/1412.6980.pdf These values are also used by other established implementations, e.g. https://www.tensorflow.org/api_docs/python/tf/train/AdamOptimizer https://keras.io/optimizers/ Recently there are slightly different values used in the paper "Attention is all you need" https://arxiv.org/pdf/1706.03762.pdf, particularly the value beta2=0.98 was used there however, beta2=0.999 is still arguably the more established value, so we use that here as well """ def __init__(self, method, learning_rate, max_grad_norm, lr_decay=1, start_decay_steps=None, decay_steps=None, beta1=0.9, beta2=0.999, adagrad_accum=0.0, decay_method=None, warmup_steps=4000 ): self.last_ppl = None self.learning_rate = learning_rate self.original_lr = learning_rate self.max_grad_norm = max_grad_norm self.method = method self.lr_decay = lr_decay self.start_decay_steps = start_decay_steps self.decay_steps = decay_steps self.start_decay = False self._step = 0 self.betas = [beta1, beta2] self.adagrad_accum = adagrad_accum self.decay_method = decay_method self.warmup_steps = warmup_steps def set_parameters(self, params): """ ? """ self.params = [] self.sparse_params = [] for k, p in params: if p.requires_grad: if self.method != 'sparseadam' or "embed" not in k: self.params.append(p) else: self.sparse_params.append(p) if self.method == 'sgd': self.optimizer = optim.SGD(self.params, lr=self.learning_rate) elif self.method == 'adagrad': self.optimizer = optim.Adagrad(self.params, lr=self.learning_rate) for group in self.optimizer.param_groups: for p in group['params']: self.optimizer.state[p]['sum'] = self.optimizer \ .state[p]['sum'].fill_(self.adagrad_accum) elif self.method == 'adadelta': self.optimizer = optim.Adadelta(self.params, lr=self.learning_rate) elif self.method == 'adam': self.optimizer = optim.Adam(self.params, lr=self.learning_rate, betas=self.betas, eps=1e-9) elif self.method == 'sparseadam': self.optimizer = MultipleOptimizer( [optim.Adam(self.params, lr=self.learning_rate, betas=self.betas, eps=1e-8), optim.SparseAdam(self.sparse_params, lr=self.learning_rate, betas=self.betas, eps=1e-8)]) else: raise RuntimeError("Invalid optim method: " + self.method) def _set_rate(self, learning_rate): self.learning_rate = learning_rate if self.method != 'sparseadam': self.optimizer.param_groups[0]['lr'] = self.learning_rate else: for op in self.optimizer.optimizers: op.param_groups[0]['lr'] = self.learning_rate def step(self): """Update the model parameters based on current gradients. Optionally, will employ gradient modification or update learning rate. """ self._step += 1 # Decay method used in tensor2tensor. if self.decay_method == "noam": self._set_rate( self.original_lr * min(self._step ** (-0.5), self._step * self.warmup_steps ** (-1.5))) # self._set_rate(self.original_lr *self.model_size ** (-0.5) *min(1.0, self._step / self.warmup_steps)*max(self._step, self.warmup_steps)**(-0.5)) # Decay based on start_decay_steps every decay_steps else: if ((self.start_decay_steps is not None) and ( self._step >= self.start_decay_steps)): self.start_decay = True if self.start_decay: if ((self._step - self.start_decay_steps) % self.decay_steps == 0): self.learning_rate = self.learning_rate * self.lr_decay if self.method != 'sparseadam': self.optimizer.param_groups[0]['lr'] = self.learning_rate if self.max_grad_norm: clip_grad_norm_(self.params, self.max_grad_norm) self.optimizer.step() ================================================ FILE: bert-chinese-web/src/models/rnn.py ================================================ import torch import torch.nn.functional as F from torch import nn class LayerNormLSTMCell(nn.LSTMCell): def __init__(self, input_size, hidden_size, bias=True): super().__init__(input_size, hidden_size, bias) self.ln_ih = nn.LayerNorm(4 * hidden_size) self.ln_hh = nn.LayerNorm(4 * hidden_size) self.ln_ho = nn.LayerNorm(hidden_size) def forward(self, input, hidden=None): self.check_forward_input(input) if hidden is None: hx = input.new_zeros(input.size(0), self.hidden_size, requires_grad=False) cx = input.new_zeros(input.size(0), self.hidden_size, requires_grad=False) else: hx, cx = hidden self.check_forward_hidden(input, hx, '[0]') self.check_forward_hidden(input, cx, '[1]') gates = self.ln_ih(F.linear(input, self.weight_ih, self.bias_ih)) \ + self.ln_hh(F.linear(hx, self.weight_hh, self.bias_hh)) i, f, o = gates[:, :(3 * self.hidden_size)].sigmoid().chunk(3, 1) g = gates[:, (3 * self.hidden_size):].tanh() cy = (f * cx) + (i * g) hy = o * self.ln_ho(cy).tanh() return hy, cy class LayerNormLSTM(nn.Module): def __init__(self, input_size, hidden_size, num_layers=1, bias=True, bidirectional=False): super().__init__() self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers self.bidirectional = bidirectional num_directions = 2 if bidirectional else 1 self.hidden0 = nn.ModuleList([ LayerNormLSTMCell(input_size=(input_size if layer == 0 else hidden_size * num_directions), hidden_size=hidden_size, bias=bias) for layer in range(num_layers) ]) if self.bidirectional: self.hidden1 = nn.ModuleList([ LayerNormLSTMCell(input_size=(input_size if layer == 0 else hidden_size * num_directions), hidden_size=hidden_size, bias=bias) for layer in range(num_layers) ]) def forward(self, input, hidden=None): seq_len, batch_size, hidden_size = input.size() # supports TxNxH only num_directions = 2 if self.bidirectional else 1 if hidden is None: hx = input.new_zeros(self.num_layers * num_directions, batch_size, self.hidden_size, requires_grad=False) cx = input.new_zeros(self.num_layers * num_directions, batch_size, self.hidden_size, requires_grad=False) else: hx, cx = hidden ht = [[None, ] * (self.num_layers * num_directions)] * seq_len ct = [[None, ] * (self.num_layers * num_directions)] * seq_len if self.bidirectional: xs = input for l, (layer0, layer1) in enumerate(zip(self.hidden0, self.hidden1)): l0, l1 = 2 * l, 2 * l + 1 h0, c0, h1, c1 = hx[l0], cx[l0], hx[l1], cx[l1] for t, (x0, x1) in enumerate(zip(xs, reversed(xs))): ht[t][l0], ct[t][l0] = layer0(x0, (h0, c0)) h0, c0 = ht[t][l0], ct[t][l0] t = seq_len - 1 - t ht[t][l1], ct[t][l1] = layer1(x1, (h1, c1)) h1, c1 = ht[t][l1], ct[t][l1] xs = [torch.cat((h[l0], h[l1]), dim=1) for h in ht] y = torch.stack(xs) hy = torch.stack(ht[-1]) cy = torch.stack(ct[-1]) else: h, c = hx, cx for t, x in enumerate(input): for l, layer in enumerate(self.hidden0): ht[t][l], ct[t][l] = layer(x, (h[l], c[l])) x = ht[t][l] h, c = ht[t], ct[t] y = torch.stack([h[-1] for h in ht]) hy = torch.stack(ht[-1]) cy = torch.stack(ct[-1]) return y, (hy, cy) ================================================ FILE: bert-chinese-web/src/others/__init__.py ================================================ ================================================ FILE: bert-chinese-web/src/others/utils.py ================================================ import re import argparse def doc_split(doc: str): doc = filter(doc) # 给主体文本切成单个句子 doc_sents = re.split(r"([。|\?|!|;|;])", doc) # 过滤空句子 doc_sents = [str(ds) for ds in doc_sents if ds != ''] doc_sents.append("") doc_sents = ["".join(i) for i in zip(doc_sents[0::2], doc_sents[1::2])] doc_sents = [di for di in doc_sents if len(di) >= 2] return doc_sents def sent_token_split(doc): doc = str(doc) doc_split = list(doc) return doc_split def filter_chinese_space(text: str) -> int: ''' 只给中文中的空格去除 :param x: :return: ''' match_regex = re.compile(u'[\u4e00-\u9fa5。\.,,::《》、\(\)()]{1} +(?', '。') x = filter_chinese_space(x) dr = re.compile(r'<[^>]+>', re.S) dr2 = re.compile(r'{[^}]+}', re.S) if x is None or str(x) == 'Nan' or str(x) == 'nan': return x x = dr.sub('', x) x = dr2.sub('', x) x = x.replace('\u3000', '') # x = x.replace(' ', '') x = x.strip() return x def str2bool(v): if v.lower() in ('yes', 'true', 't', 'y', '1'): return True elif v.lower() in ('no', 'false', 'f', 'n', '0'): return False else: raise argparse.ArgumentTypeError('Boolean value expected.') def int_arr_to_str(arr: list): arr = [str(i) for i in arr] return ' '.join(arr) def label_to_idx(label_arr: list): # 词袋形 label arr,转成 索引位置:[1,0,1,1,0]>>>>>[0,2,3] return [i for i, li in enumerate(label_arr) if li == 1] ================================================ FILE: bert-chinese-web/src/prepro/__init__.py ================================================ ================================================ FILE: bert-chinese-web/src/prepro/data_builder.py ================================================ # -*- coding: utf-8 -* from transformers import BertTokenizer from src.others.utils import filter, doc_split, sent_token_split import torch class BatchExample(object): def _pad(self, data, pad_id, width=-1): if width == -1: width = max(len(d) for d in data) rtn_data = [d + [pad_id] * (width - len(d)) for d in data] return rtn_data def __init__(self, batch_example=None, device=None): if batch_example is not None: self.batch_size = len(batch_example) if batch_example != []: pre_src = [e.src.cpu().numpy().tolist()[0] for e in batch_example] pre_segs = [e.segs.cpu().numpy().tolist()[0] for e in batch_example] pre_clss = [e.clss.cpu().numpy().tolist()[0] for e in batch_example] src = torch.tensor(self._pad(pre_src, 0)) segs = torch.tensor(self._pad(pre_segs, 0)) mask = ~(src == 0) clss = torch.tensor(self._pad(pre_clss, -1)) mask_cls = ~ (clss == -1) clss[clss == -1] = 0 setattr(self, 'clss', clss.to(device)) setattr(self, 'mask_cls', mask_cls.to(device)) setattr(self, 'src', src.to(device)) setattr(self, 'segs', segs.to(device)) setattr(self, 'mask', mask.to(device)) def __len__(self): return self.batch_size class Example(object): def __init__(self, data: list, device=None): pre_src = [data[0]] pre_segs = [data[1]] pre_clss = [data[2]] src = torch.tensor(pre_src) src_mask = ~(src == 0) segs = torch.tensor(pre_segs) clss = torch.tensor(pre_clss) cls_mask = ~ (clss == -1) setattr(self, 'src', src.to(device)) setattr(self, 'src_mask', src_mask.to(device)) setattr(self, 'segs', segs.to(device)) setattr(self, 'clss', clss.to(device)) setattr(self, 'cls_mask', cls_mask.to(device)) class BertData(object): def __init__(self, vocab_path, device='cpu'): self.device = device self.tokenizer = BertTokenizer.from_pretrained(vocab_path, do_lower_case=True) self.sep_vid = self.tokenizer.vocab['[SEP]'] self.cls_vid = self.tokenizer.vocab['[CLS]'] self.pad_vid = self.tokenizer.vocab['[PAD]'] def split_long_doc(self, document: str, max_num=510): document = filter(document) doc_sents = doc_split(document) document_list = [] a_temp_doc = '' if len(doc_sents) <= 1: return doc_sents for si in doc_sents: if len(a_temp_doc) + len(si) > max_num: document_list.append(a_temp_doc) a_temp_doc = si else: a_temp_doc += si if a_temp_doc != '': document_list.append(a_temp_doc) return document_list def preprocess(self, document: str, min_sent_num=3): document = filter(document) doc_sents = doc_split(document) if len(doc_sents) <= min_sent_num: return None, doc_sents src = [sent_token_split(sent) for sent in doc_sents] src_txt = [' '.join(sent) for sent in src] text = ' [SEP] [CLS] '.join(src_txt) src_subtokens = self.tokenizer.tokenize(text) # bert,512写死了 src_subtokens = src_subtokens[:510] src_subtokens = ['[CLS]'] + src_subtokens + ['[SEP]'] src_subtoken_idxs = self.tokenizer.convert_tokens_to_ids(src_subtokens) _segs = [-1] + [i for i, t in enumerate(src_subtoken_idxs) if t == self.sep_vid] segs = [_segs[i] - _segs[i - 1] for i in range(1, len(_segs))] segments_ids = [] for i, s in enumerate(segs): if i % 2 == 0: segments_ids += s * [0] else: segments_ids += s * [1] cls_ids = [i for i, t in enumerate(src_subtoken_idxs) if t == self.cls_vid] data = [src_subtoken_idxs, segments_ids, cls_ids] example = Example(data, self.device) return example, doc_sents ================================================ FILE: bert-chinese-web/templates/index.html ================================================ demo

金融文本摘要demo




================================================ FILE: bert-chinese-web/web_main.py ================================================ from flask import Flask from flask import render_template, request from predict import Bert_summary_model from config import iphost, port app = Flask(__name__) @app.route('/') def index(): return render_template('index.html') @app.route('/api_summary', methods=("GET", "POST")) def api_summary(): if request.method == "POST": info = request.values.to_dict() doc = info['doc'] doc = doc.replace('\n', '') if len(doc) > sum_model.max_process_len: summary = sum_model.long_predict(doc) else: summary = sum_model.predict(doc) return summary else: return "" if __name__ == '__main__': app.jinja_env.auto_reload = True app.config['TEMPLATES_AUTO_RELOAD'] = True sum_model = Bert_summary_model() app.run(host=iphost, port=port, debug=True) # app.run(host='127.0.0.1', port=8080, debug=True) ================================================ FILE: bert-sum-dataprocess/.idea/.gitignore ================================================ # Default ignored files /shelf/ /workspace.xml ================================================ FILE: bert-sum-dataprocess/.idea/bert-sum-dataprocess.iml ================================================ ================================================ FILE: bert-sum-dataprocess/.idea/inspectionProfiles/Project_Default.xml ================================================ ================================================ FILE: bert-sum-dataprocess/.idea/inspectionProfiles/profiles_settings.xml ================================================ ================================================ FILE: bert-sum-dataprocess/.idea/misc.xml ================================================ ================================================ FILE: bert-sum-dataprocess/.idea/modules.xml ================================================ ================================================ FILE: bert-sum-dataprocess/README.md ================================================ # bertsum的数据处理 专门的数据处理小项目 ================================================ FILE: bert-sum-dataprocess/data/scope.csv ================================================ doc json sentence1。sentence2。sentence3。sentence4。sentence5。 [[{"sentence":"sentence1"},{"sentence":"sentence4"}]] ================================================ FILE: bert-sum-dataprocess/json_data/LCSTS.test.0.json ================================================ [ { "ids": [ 0, 1 ], "src": [ [ "本", "文", "总", "结", "了", "十", "个", "可", "穿", "戴", "产", "品", "的", "设", "计", "原", "则", "," ], [ "而", "这", "些", "原", "则", "," ], [ "同", "样", "也", "是", "笔", "者", "认", "为", "是", "这", "个", "行", "业", "最", "吸", "引", "人", "的", "地", "方", ":", "1", ".", "为", "人", "们", "解", "决", "重", "复", "性", "问", "题", ";" ], [ "2", ".", "从", "人", "开", "始", "," ], [ "而", "不", "是", "从", "机", "器", "开", "始", ";" ], [ "3", ".", "要", "引", "起", "注", "意", "," ], [ "但", "不", "要", "刻", "意", ";" ], [ "4", ".", "提", "升", "用", "户", "能", "力", "," ], [ "而", "不", "是", "取", "代", "人" ] ] }, { "ids": [ 0, 1 ], "src": [ [ "2", "0", "0", "7", "年", "乔", "布", "斯", "向", "人", "们", "展", "示", "i", "P", "h", "o", "n", "e", "并", "宣", "称", "“", "它", "将", "会", "改", "变", "世", "界", "”", ",", "还", "有", "人", "认", "为", "他", "在", "夸", "大", "其", "词", ",", "然", "而", "在", "8", "年", "后", ",", "以", "i", "P", "h", "o", "n", "e", "为", "代", "表", "的", "触", "屏", "智", "能", "手", "机", "已", "经", "席", "卷", "全", "球", "各", "个", "角", "落", "。" ], [ "未", "来", ",", "智", "能", "手", "机", "将", "会", "成", "为", "“", "真", "正", "的", "个", "人", "电", "脑", "”", ",", "为", "人", "类", "发", "展", "做", "出", "更", "大", "的", "贡", "献", "。" ] ] }, { "ids": [ 0, 1 ], "src": [ [ "雅", "虎", "发", "布", "2", "0", "1", "4", "年", "第", "四", "季", "度", "财", "报", ",", "并", "推", "出", "了", "免", "税", "方", "式", "剥", "离", "其", "持", "有", "的", "阿", "里", "巴", "巴", "集", "团", "1", "5", "%", "股", "权", "的", "计", "划", ",", "打", "算", "将", "这", "一", "价", "值", "约", "4", "0", "0", "亿", "美", "元", "的", "宝", "贵", "投", "资", "分", "配", "给", "股", "东", "。" ], [ "截", "止", "发", "稿", "前", ",", "雅", "虎", "股", "价", "上", "涨", "了", "大", "约", "7", "%", ",", "至", "5", "1", ".", "4", "5", "美", "元", "。" ] ] }, { "ids": [ 0, 1 ], "src": [ [ "2", "0", "1", "4", "年", ",", "5", "1", "信", "用", "卡", "管", "家", "跟", "宜", "信", "等", "P", "2", "P", "公", "司", "合", "作", ",", "推", "出", "线", "上", "信", "贷", "产", "品", "“", "瞬", "时", "贷", "”", ",", "其", "是", "一", "种", "纯", "在", "线", "操", "作", "的", "信", "贷", "模", "式", "。" ], [ "5", "1", "信", "用", "卡", "管", "家", "创", "始", "人", "孙", "海", "涛", "说", ",", "5", "1", "目", "前", "每", "天", "放", "贷", "1", "0", "0", "0", "万", ",", "预", "计", "2", "0", "1", "5", "年", ",", "自", "营", "产", "品", "加", "上", "瞬", "时", "贷", ",", "放", "贷", "额", "度", "将", "远", "超", "3", "0", "亿", "。" ] ] }, { "ids": [ 0, 1 ], "src": [ [ "目", "前", "世", "界", "上", "有", "着", "几", "百", "种", "编", "程", "语", "言", ",", "我", "应", "该", "学", "哪", "个", "?", "如", "何", "选", "择", "“", "正", "确", "”", "的", "编", "程", "语", "言", "进", "行", "学", "习", "?", "我", "所", "学", "的", "语", "言", "日", "后", "能", "否", "成", "为", "我", "获", "取", "好", "生", "活", "的", "保", "障", "?", "在", "这", "个", "问", "题", "上", ",", "很", "多", "人", "都", "曾", "经", "给", "出", "了", "他", "们", "都", "看", "法", "。" ], [ "但", "在", "我", "看", "来", ",", "这", "个", "问", "题", "答", "案", "其", "实", "非", "常", "简", "单", ":", "那", "就", "是", "J", "a", "v", "a", "S", "c", "r", "i", "p", "t", "。" ] ] }, { "ids": [ 0, 1 ], "src": [ [ "受", "众", "在", "哪", "里", ",", "媒", "体", "就", "应", "该", "在", "哪", "里", ",", "媒", "体", "的", "体", "制", "、", "内", "容", "、", "技", "术", "就", "应", "该", "向", "哪", "里", "转", "变", "。" ], [ "媒", "体", "融", "合", "关", "键", "是", "以", "人", "为", "本", ",", "即", "满", "足", "大", "众", "的", "信", "息", "需", "求", ",", "为", "受", "众", "提", "供", "更", "优", "质", "的", "服", "务", "。" ], [ "这", "就", "要", "求", "媒", "体", "在", "融", "合", "发", "展", "的", "过", "程", "中", ",", "既", "注", "重", "技", "术", "创", "新", ",", "又", "注", "重", "用", "户", "体", "验", "。" ] ] } ] ================================================ FILE: bert-sum-dataprocess/json_data/LCSTS.train.0.json ================================================ [ { "ids": [ 0, 1 ], "src": [ [ "本", "文", "总", "结", "了", "十", "个", "可", "穿", "戴", "产", "品", "的", "设", "计", "原", "则", "," ], [ "而", "这", "些", "原", "则", "," ], [ "同", "样", "也", "是", "笔", "者", "认", "为", "是", "这", "个", "行", "业", "最", "吸", "引", "人", "的", "地", "方", ":", "1", ".", "为", "人", "们", "解", "决", "重", "复", "性", "问", "题", ";" ], [ "2", ".", "从", "人", "开", "始", "," ], [ "而", "不", "是", "从", "机", "器", "开", "始", ";" ], [ "3", ".", "要", "引", "起", "注", "意", "," ], [ "但", "不", "要", "刻", "意", ";" ], [ "4", ".", "提", "升", "用", "户", "能", "力", "," ], [ "而", "不", "是", "取", "代", "人" ] ] }, { "ids": [ 0, 1 ], "src": [ [ "2", "0", "0", "7", "年", "乔", "布", "斯", "向", "人", "们", "展", "示", "i", "P", "h", "o", "n", "e", "并", "宣", "称", "“", "它", "将", "会", "改", "变", "世", "界", "”", ",", "还", "有", "人", "认", "为", "他", "在", "夸", "大", "其", "词", ",", "然", "而", "在", "8", "年", "后", ",", "以", "i", "P", "h", "o", "n", "e", "为", "代", "表", "的", "触", "屏", "智", "能", "手", "机", "已", "经", "席", "卷", "全", "球", "各", "个", "角", "落", "。" ], [ "未", "来", ",", "智", "能", "手", "机", "将", "会", "成", "为", "“", "真", "正", "的", "个", "人", "电", "脑", "”", ",", "为", "人", "类", "发", "展", "做", "出", "更", "大", "的", "贡", "献", "。" ] ] }, { "ids": [ 0, 1 ], "src": [ [ "雅", "虎", "发", "布", "2", "0", "1", "4", "年", "第", "四", "季", "度", "财", "报", ",", "并", "推", "出", "了", "免", "税", "方", "式", "剥", "离", "其", "持", "有", "的", "阿", "里", "巴", "巴", "集", "团", "1", "5", "%", "股", "权", "的", "计", "划", ",", "打", "算", "将", "这", "一", "价", "值", "约", "4", "0", "0", "亿", "美", "元", "的", "宝", "贵", "投", "资", "分", "配", "给", "股", "东", "。" ], [ "截", "止", "发", "稿", "前", ",", "雅", "虎", "股", "价", "上", "涨", "了", "大", "约", "7", "%", ",", "至", "5", "1", ".", "4", "5", "美", "元", "。" ] ] }, { "ids": [ 0, 1 ], "src": [ [ "2", "0", "1", "4", "年", ",", "5", "1", "信", "用", "卡", "管", "家", "跟", "宜", "信", "等", "P", "2", "P", "公", "司", "合", "作", ",", "推", "出", "线", "上", "信", "贷", "产", "品", "“", "瞬", "时", "贷", "”", ",", "其", "是", "一", "种", "纯", "在", "线", "操", "作", "的", "信", "贷", "模", "式", "。" ], [ "5", "1", "信", "用", "卡", "管", "家", "创", "始", "人", "孙", "海", "涛", "说", ",", "5", "1", "目", "前", "每", "天", "放", "贷", "1", "0", "0", "0", "万", ",", "预", "计", "2", "0", "1", "5", "年", ",", "自", "营", "产", "品", "加", "上", "瞬", "时", "贷", ",", "放", "贷", "额", "度", "将", "远", "超", "3", "0", "亿", "。" ] ] }, { "ids": [ 0, 1 ], "src": [ [ "目", "前", "世", "界", "上", "有", "着", "几", "百", "种", "编", "程", "语", "言", ",", "我", "应", "该", "学", "哪", "个", "?", "如", "何", "选", "择", "“", "正", "确", "”", "的", "编", "程", "语", "言", "进", "行", "学", "习", "?", "我", "所", "学", "的", "语", "言", "日", "后", "能", "否", "成", "为", "我", "获", "取", "好", "生", "活", "的", "保", "障", "?", "在", "这", "个", "问", "题", "上", ",", "很", "多", "人", "都", "曾", "经", "给", "出", "了", "他", "们", "都", "看", "法", "。" ], [ "但", "在", "我", "看", "来", ",", "这", "个", "问", "题", "答", "案", "其", "实", "非", "常", "简", "单", ":", "那", "就", "是", "J", "a", "v", "a", "S", "c", "r", "i", "p", "t", "。" ] ] }, { "ids": [ 0, 1 ], "src": [ [ "受", "众", "在", "哪", "里", ",", "媒", "体", "就", "应", "该", "在", "哪", "里", ",", "媒", "体", "的", "体", "制", "、", "内", "容", "、", "技", "术", "就", "应", "该", "向", "哪", "里", "转", "变", "。" ], [ "媒", "体", "融", "合", "关", "键", "是", "以", "人", "为", "本", ",", "即", "满", "足", "大", "众", "的", "信", "息", "需", "求", ",", "为", "受", "众", "提", "供", "更", "优", "质", "的", "服", "务", "。" ], [ "这", "就", "要", "求", "媒", "体", "在", "融", "合", "发", "展", "的", "过", "程", "中", ",", "既", "注", "重", "技", "术", "创", "新", ",", "又", "注", "重", "用", "户", "体", "验", "。" ] ] } ] ================================================ FILE: bert-sum-dataprocess/json_data/scope.train.chunk_size_1.0.json ================================================ [{"src": [["s", "e", "n", "t", "e", "n", "c", "e", "1", "。"], ["s", "e", "n", "t", "e", "n", "c", "e", "2", "。"], ["s", "e", "n", "t", "e", "n", "c", "e", "3", "。"], ["s", "e", "n", "t", "e", "n", "c", "e", "4", "。"], ["s", "e", "n", "t", "e", "n", "c", "e", "5", "。"]], "ids": [0, 3]}] ================================================ FILE: bert-sum-dataprocess/main.py ================================================ import pandas as pd import json from src.utils import filter, have_dirty_key, doc_split, save_data_arr_to_json def get_input_data_iter(): data_pd = pd.read_csv(data_path, sep='\t') print(data_pd.shape) doc_list = data_pd['doc'].tolist() json_list = data_pd['json'].tolist() for i, json_str_i in enumerate(json_list): item_list = json.loads(json_str_i) item_dict_list = item_list[0] # 关键句 item_key_sents = [filter(item['sentence']) for item in item_dict_list] item_key_sents = [si for si in item_key_sents if have_dirty_key(si) == False] if len(item_key_sents) == 0: continue doc_sents = doc_split(doc_list[i]) doc_sents = [filter(di) for di in doc_sents] # 组成:文档句,关键句。(比如,文档10句话,其中3句话是关键句子(拿来被做抽取式摘要)) item = {'doc_sents': doc_sents, 'key_sents': item_key_sents} yield item data_path = 'data/scope.csv' data_arr_iter = get_input_data_iter() save_data_arr_to_json(data_arr_iter, chunk_size=2000, file_name='json_data/scope.train') ================================================ FILE: bert-sum-dataprocess/src/__init__.py ================================================ ================================================ FILE: bert-sum-dataprocess/src/utils.py ================================================ import re, json def filter(x: str): dr = re.compile(r'<[^>]+>', re.S) dr2 = re.compile(r'{[^>]+}', re.S) if x is None or x == 'nan': return x x = dr.sub('', x) x = dr2.sub('', x) x = x.replace('\u3000', '').strip() return x def have_dirty_key(doc): dirty_key = ['function()', 'show()', 'hide()'] for di in dirty_key: if di in doc: return True if str(doc) == 'nan': return True return False def paser_out_label(doc_sents: list, key_sents: list): label_arr = [] match_num = 0 for si in range(len(doc_sents)): mac_s = doc_sents[si] for ks in key_sents: if mac_s in ks or ks in mac_s: label_arr.append(si) match_num += 1 break try: assert match_num > 0, '一句没匹配到' assert match_num == len(key_sents), '关键句未匹配完全' except: for ks in key_sents: if have_dirty_key(ks): print('关键句中存在脏数据,导致未匹配到') break if match_num >= 1: return label_arr return None return label_arr def sent_token_split(doc: str): doc_split = list(doc) return doc_split def doc_split(doc: str): doc_sents = re.split('([。\?!;;])', doc) doc_sents = [str(ds) for ds in doc_sents if ds != ''] doc_sents.append('') doc_sents = [''.join(i) for i in zip(doc_sents[0::2], doc_sents[1::2])] return doc_sents def format_to_json(doc_sents_arr, idx_arr): token_docs = [sent_token_split(sent) for sent in doc_sents_arr] json_item = {'src': token_docs, 'ids':idx_arr} return json_item def save_data_arr_to_json(data_arr_iter, chunk_size=2000, file_name='data/json/train'): dataset = [] p_ct = 0 for data_item_i in data_arr_iter: doc_sents = data_item_i['doc_sents'] key_sents = data_item_i['key_sents'] label_arr = paser_out_label(doc_sents, key_sents) if label_arr is None or len(label_arr) == 0: continue json_dict = format_to_json(doc_sents, label_arr) dataset.append(json_dict) if len(dataset) >= chunk_size: path = '{:s}.chunk_size_{:d}.{:d}.json'.format(file_name, chunk_size, p_ct) with open(path, 'w', encoding='utf-8') as save: tp_js = json.dumps(dataset, ensure_ascii=False) save.write(tp_js) save.write('\n') dataset = [] print('saved:', path) if len(dataset) > 0: path = '{:s}.chunk_size_{:d}.{:d}.json'.format(file_name, len(dataset), p_ct) with open(path, 'w', encoding='utf-8') as save: tp_js = json.dumps(dataset, ensure_ascii=False) save.write(tp_js) save.write('\n') dataset = [] print('saved:', path) ================================================ FILE: bertsum-chinese/.idea/.gitignore ================================================ # Default ignored files /shelf/ /workspace.xml ================================================ FILE: bertsum-chinese/.idea/bertsum-chinese.iml ================================================ ================================================ FILE: bertsum-chinese/.idea/inspectionProfiles/Project_Default.xml ================================================ ================================================ FILE: bertsum-chinese/.idea/inspectionProfiles/profiles_settings.xml ================================================ ================================================ FILE: bertsum-chinese/.idea/misc.xml ================================================ ================================================ FILE: bertsum-chinese/.idea/modules.xml ================================================ ================================================ FILE: bertsum-chinese/.idea/vcs.xml ================================================ ================================================ FILE: bertsum-chinese/LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: bertsum-chinese/README.md ================================================ # BERTSUM中文摘要 - 1.准备好json_data/ 下的那种样式的数据 - 2.运行preprocess_LAI.py把json数据转成pt形式的二进制数据 > 注意里面需要设置你自己的bert-base-chinese > > 如果json数据转换失败,为空[]之类,请debug `src/prepro/http://data_builder_lai.py/ ` 102-105行代码,从那里开始处理json数据 - 3.运行src/train_LAI.py 开始训练 > src/args_config.py 下指定好你的参数和bert-base-chinese依赖 > # 大致原理 -(不懂的加QQ/微信,** 小白,连Python、Pytorch都没入门的不要加了。🙏 浪费大家时间,先学基础 **) - 对文本句子进行分句(0/1),1是关键句,即,关键句分类。PS:文本长度超过512,切成多段低于512的。 - 文本有7句话:则是对7个维度的[CLS]位置向量输出0/1预测 - 抽取式摘要,效果咋不好说,有好有坏,还不错 ## 数据没有? 可以百度的接口生成一些训练数据,是抽取式摘要的,可以免费调用50w次 参考里面的“新闻摘要”: https://cloud.baidu.com/product/nlp ```python # -*- coding: utf-8 -*- from aip import AipNlp # 去注册生成你的 APP_ID = '22222' API_KEY = 'xxxx' SECRET_KEY = 'xxxxx' client = AipNlp(APP_ID, API_KEY, SECRET_KEY) content = "3月6日,自治区政府印发划转部分国有资本充实社保基金实施方案的通知。当前,在推动国有企业深化改革的同时,通过划转部分国有资本充实社保基金,使人民群众共享国有企业发展成果,增进民生福祉,促进改革和完善基本养老保险制度,实现代际公平,增强制度的可持续性。划转范围。为我区国有及国有控股大中型企业、金融机构纳入划转范围。公益类企业、文化企业以及国务院另有规定的除外。划转对象。一是由自治区国资委监管或直接持有纳入划转范围的国有股权。二是由自治区有关部门(单位)监管或直接持有纳入划转范围的国有股权。三是由市、县(区)人民政府直接持有纳入划转范围的国有股权。划转对象涉及多个国有股东的,按照不重复划转原则进行划转。中央和地方混合持股的企业,按照第一大股东产权归属关系进行划转。划转比例。划转比例统一为纳入划转范围企业国有股权的10%。以后根据中央政策规定和我区基本养老保险基金缺口适时调整。划转基准日。本次国有股权划转原则上以2019年12月31日作为划转基准日。后续如有符合划转条件的企业,以上一年度末作为划转基准日。承接主体。我区划转的企业国有股权,委托自治区财政厅履行出资人职责的企业作为全区唯一承接主体,负责集中统一持有、专户管理和独立运营。各市、县(区)不再设立承接主体。国有资产直接划拨等制度性安排,社保基金的力量不断壮大,为我国现行养老制度的存续提供了充分安全可靠的后盾和保障。在这个过程里,国有资产的划入起到了至关重要的支柱性作用,而这也是国有资产社会使命的充分落实。" maxSummaryLen = 300 res = client.newsSummary(content, maxSummaryLen) print(res['summary']) # options = {} # options["title"] = "标题" # client.newsSummary(content, maxSummaryLen, options) ``` ================================================ FILE: bertsum-chinese/args_config.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2020/3/3 3:13 PM # @Author : xinfa.jiang # @Site : # @File : args_config.py # @Software: PyCharm import argparse from src.others.utils import str2bool import os root = os.path.abspath(os.path.dirname(__file__)) results_path = os.path.join(root, 'results') models_path = os.path.join(root, 'models') # bert_base_chinese = os.path.join(root, 'bert-base-chinese') bert_base_chinese = '/Users/jiang/Documents/bert/bert-base-chinese' parser = argparse.ArgumentParser() parser.add_argument("-encoder", default='classifier', type=str, choices=['classifier']) # 训练还是测试,目前支持 train , test parser.add_argument("-mode", default='train', type=str, choices=['train', 'test']) # bert_data_path:训练的pt数据目录,bert_data/LCSTS : 取目录下LCSTS开头的数据 parser.add_argument("-bert_data_path", default='bert_data/LCSTS') parser.add_argument("-model_path", default='models/bert_classifier') parser.add_argument("-result_path", default='results/result') parser.add_argument("-temp_dir", default='temp') # 必须:预训练的pytorch 的bert-base-chinese模型路径下的配置目录 bert_mode_json_path = os.path.join(bert_base_chinese, 'config.json') parser.add_argument("-bert_config_path", default=bert_mode_json_path) parser.add_argument("-batch_size", default=600, type=int) parser.add_argument("-use_interval", type=str2bool, nargs='?', const=True, default=True) parser.add_argument("-hidden_size", default=128, type=int) parser.add_argument("-ff_size", default=2048, type=int) parser.add_argument("-heads", default=8, type=int) parser.add_argument("-inter_layers", default=2, type=int) parser.add_argument("-rnn_size", default=512, type=int) parser.add_argument("-param_init", default=0, type=float) parser.add_argument("-param_init_glorot", type=str2bool, nargs='?', const=True, default=True) parser.add_argument("-dropout", default=0.1, type=float) parser.add_argument("-optim", default='adam', type=str) parser.add_argument("-lr", default=2e-3, type=float) parser.add_argument("-beta1", default=0.9, type=float) parser.add_argument("-beta2", default=0.999, type=float) parser.add_argument("-decay_method", default='noam', type=str) parser.add_argument("-warmup_steps", default=8000, type=int) parser.add_argument("-max_grad_norm", default=0, type=float) parser.add_argument("-recall_eval", type=str2bool, nargs='?', const=True, default=False) parser.add_argument("-save_checkpoint_steps", default=1000, type=int) # 批次训练数,3个batch parser.add_argument("-accum_count", default=3, type=int) # 最多训练次数 parser.add_argument("-train_steps", default=40000, type=int) parser.add_argument('-visible_gpus', default='-1', type=str) parser.add_argument('-gpu_ranks', default='0', type=str) parser.add_argument('-log_file', default='logs/bert_classifier') parser.add_argument('-seed', default=666, type=int) # 在test的时候有用,告诉加载哪个保存的step模型进行预测 parser.add_argument("-test_from", default='') # 训练制定起始模型,没有这个,请设置为空 :'' ,有的话会基于这个模型增量训练 parser.add_argument("-train_from", default='') # 必须:预训练的pytorch 的bert-base-chinese模型路径 parser.add_argument("-bert_base_chinese", type=str, default=bert_base_chinese) args = parser.parse_args() ================================================ FILE: bertsum-chinese/bert_data/LCSTS.train.1.bert.pt ================================================ [File too large to display: 36.7 MB] ================================================ FILE: bertsum-chinese/json_data/LCSTS.train.1.json ================================================ [File too large to display: 24.4 MB] ================================================ FILE: bertsum-chinese/logs/bert_classifier ================================================ [2020-03-06 18:31:04,797 INFO] Device ID -1 [2020-03-06 18:31:04,797 INFO] Device cpu [2020-03-06 18:31:04,803 INFO] loading archive file ../bert-base-chinese [2020-03-06 18:31:04,806 INFO] Model config { "attention_probs_dropout_prob": 0.1, "directionality": "bidi", "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "initializer_range": 0.02, "intermediate_size": 3072, "max_position_embeddings": 512, "num_attention_heads": 12, "num_hidden_layers": 12, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "type_vocab_size": 2, "vocab_size": 21128 } [2020-03-06 18:31:07,728 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-03-06 18:31:07,747 INFO] * number of parameters: 102268417 [2020-03-06 18:31:07,748 INFO] Start training... [2020-03-06 18:31:07,750 INFO] Loading train dataset from ../bert_data_test\LCSTS.train.0.bert.pt, number of examples: 2 [2020-03-06 18:31:07,750 INFO] loaded:../bert_data_test\LCSTS.train.0.bert.pt [2020-03-06 18:31:07,849 INFO] Loading train dataset from ../bert_data_test\LCSTS.train.0.bert.pt, number of examples: 2 [2020-03-06 18:31:07,849 INFO] loaded:../bert_data_test\LCSTS.train.0.bert.pt [2020-03-06 18:31:07,900 INFO] Loading train dataset from ../bert_data_test\LCSTS.train.0.bert.pt, number of examples: 2 [2020-03-06 18:31:07,900 INFO] loaded:../bert_data_test\LCSTS.train.0.bert.pt [2020-03-06 18:31:18,816 INFO] Saving checkpoint ../models/bert_classifier\model_step_1.pt [2020-03-06 18:31:21,935 INFO] Loading train dataset from ../bert_data_test\LCSTS.train.0.bert.pt, number of examples: 2 [2020-03-06 18:31:21,936 INFO] loaded:../bert_data_test\LCSTS.train.0.bert.pt [2020-03-06 18:31:21,983 INFO] Loading train dataset from ../bert_data_test\LCSTS.train.0.bert.pt, number of examples: 2 [2020-03-06 18:31:21,983 INFO] loaded:../bert_data_test\LCSTS.train.0.bert.pt [2020-03-06 18:31:22,026 INFO] Loading train dataset from ../bert_data_test\LCSTS.train.0.bert.pt, number of examples: 2 [2020-03-06 18:31:22,026 INFO] loaded:../bert_data_test\LCSTS.train.0.bert.pt [2020-03-06 18:31:32,436 INFO] Saving checkpoint ../models/bert_classifier\model_step_2.pt [2020-03-06 18:31:35,543 INFO] Loading train dataset from ../bert_data_test\LCSTS.train.0.bert.pt, number of examples: 2 [2020-03-06 18:31:35,543 INFO] loaded:../bert_data_test\LCSTS.train.0.bert.pt [2020-03-06 18:31:35,609 INFO] Loading train dataset from ../bert_data_test\LCSTS.train.0.bert.pt, number of examples: 2 [2020-03-06 18:31:35,610 INFO] loaded:../bert_data_test\LCSTS.train.0.bert.pt [2020-03-06 18:31:35,664 INFO] Loading train dataset from ../bert_data_test\LCSTS.train.0.bert.pt, number of examples: 2 [2020-03-06 18:31:35,664 INFO] loaded:../bert_data_test\LCSTS.train.0.bert.pt [2020-03-06 18:35:01,661 INFO] Loading checkpoint from ../models/bert_classifier/model_step_2.pt [2020-03-06 18:35:04,459 INFO] Loading test dataset from ../bert_data_test\LCSTS.test.0.bert.pt, number of examples: 2 [2020-03-06 18:35:04,460 INFO] loaded:../bert_data_test\LCSTS.test.0.bert.pt [2020-03-06 18:35:04,468 INFO] * number of parameters: 102268417 [2020-03-09 13:50:29,412 INFO] Device ID -1 [2020-03-09 13:50:30,091 INFO] Device cpu [2020-03-09 13:50:38,874 INFO] loading archive file ../bert-base-chinese [2020-03-09 13:50:38,881 INFO] Model config { "attention_probs_dropout_prob": 0.1, "directionality": "bidi", "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "initializer_range": 0.02, "intermediate_size": 3072, "max_position_embeddings": 512, "num_attention_heads": 12, "num_hidden_layers": 12, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "type_vocab_size": 2, "vocab_size": 21128 } [2020-03-09 13:50:46,518 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-03-09 13:50:48,754 INFO] * number of parameters: 102268417 [2020-03-09 13:50:54,551 INFO] Start training... [2020-03-09 13:51:53,116 INFO] Device ID -1 [2020-03-09 13:51:53,116 INFO] Device cpu [2020-03-09 13:51:53,121 INFO] loading archive file ../bert-base-chinese [2020-03-09 13:51:53,123 INFO] Model config { "attention_probs_dropout_prob": 0.1, "directionality": "bidi", "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "initializer_range": 0.02, "intermediate_size": 3072, "max_position_embeddings": 512, "num_attention_heads": 12, "num_hidden_layers": 12, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "type_vocab_size": 2, "vocab_size": 21128 } [2020-03-09 13:51:55,456 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-03-09 13:51:55,502 INFO] * number of parameters: 102268417 [2020-03-09 13:51:55,507 INFO] Start training... [2020-03-09 14:23:00,316 INFO] Device ID -1 [2020-03-09 14:23:00,317 INFO] Device cpu [2020-03-09 14:23:00,323 INFO] loading archive file ../bert-base-chinese [2020-03-09 14:23:00,326 INFO] Model config { "attention_probs_dropout_prob": 0.1, "directionality": "bidi", "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "initializer_range": 0.02, "intermediate_size": 3072, "max_position_embeddings": 512, "num_attention_heads": 12, "num_hidden_layers": 12, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "type_vocab_size": 2, "vocab_size": 21128 } [2020-03-09 14:23:02,587 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-03-09 14:23:02,623 INFO] * number of parameters: 102268417 [2020-03-09 14:23:02,628 INFO] Start training... [2020-03-09 14:42:11,510 INFO] Device ID -1 [2020-03-09 14:42:11,511 INFO] Device cpu [2020-03-09 14:42:11,515 INFO] loading archive file ../bert-base-chinese [2020-03-09 14:42:11,517 INFO] Model config { "attention_probs_dropout_prob": 0.1, "directionality": "bidi", "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "initializer_range": 0.02, "intermediate_size": 3072, "max_position_embeddings": 512, "num_attention_heads": 12, "num_hidden_layers": 12, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "type_vocab_size": 2, "vocab_size": 21128 } [2020-03-09 14:42:13,895 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-03-09 14:42:13,930 INFO] * number of parameters: 102268417 [2020-03-09 14:42:13,935 INFO] Start training... [2020-03-09 14:44:38,922 INFO] Device ID -1 [2020-03-09 14:44:38,923 INFO] Device cpu [2020-03-09 14:44:38,927 INFO] loading archive file ../bert-base-chinese [2020-03-09 14:44:38,929 INFO] Model config { "attention_probs_dropout_prob": 0.1, "directionality": "bidi", "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "initializer_range": 0.02, "intermediate_size": 3072, "max_position_embeddings": 512, "num_attention_heads": 12, "num_hidden_layers": 12, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "type_vocab_size": 2, "vocab_size": 21128 } [2020-03-09 14:44:41,225 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-03-09 14:44:41,258 INFO] * number of parameters: 102268417 [2020-03-09 14:44:41,262 INFO] Start training... [2020-03-09 14:44:41,513 INFO] Loading train dataset from ../bert_data_test\chinascope.train.5.bert.pt, number of examples: 4700 [2020-03-09 14:44:41,513 INFO] loaded:../bert_data_test\chinascope.train.5.bert.pt [2020-03-10 13:35:18,312 INFO] Loading checkpoint from ../models/bert_classifier/model_step_10001.pt [2020-03-10 14:21:31,344 INFO] Loading checkpoint from ../models/bert_classifier/model_step_10001.pt [2020-03-10 14:23:56,777 INFO] Loading checkpoint from ../models/bert_classifier/model_step_10001.pt [2020-03-10 14:24:00,485 INFO] Loading test dataset from ../bert_data_test\chinascope.test.0.bert.pt, number of examples: 4577 [2020-03-10 14:24:00,486 INFO] loaded:../bert_data_test\chinascope.test.0.bert.pt [2020-03-10 14:24:00,594 INFO] * number of parameters: 102268417 [2020-03-10 14:36:34,492 INFO] Device ID -1 [2020-03-10 14:36:34,493 INFO] Device cpu [2020-03-10 14:36:34,501 INFO] loading archive file ../bert-base-chinese [2020-03-10 14:36:34,503 INFO] Model config { "attention_probs_dropout_prob": 0.1, "directionality": "bidi", "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "initializer_range": 0.02, "intermediate_size": 3072, "max_position_embeddings": 512, "num_attention_heads": 12, "num_hidden_layers": 12, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "type_vocab_size": 2, "vocab_size": 21128 } [2020-03-10 14:36:37,436 INFO] Loading checkpoint from ../models/bert_classifier/model_step_10001.pt [2020-03-10 14:36:39,724 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-03-10 14:36:39,776 INFO] * number of parameters: 102268417 [2020-03-10 14:36:39,780 INFO] Start training... [2020-03-10 14:36:40,050 INFO] Loading train dataset from ../bert_data_test\chinascope.train.5.bert.pt, number of examples: 4704 [2020-03-10 14:36:40,051 INFO] loaded:../bert_data_test\chinascope.train.5.bert.pt [2020-03-10 14:37:05,574 INFO] Device ID -1 [2020-03-10 14:37:05,574 INFO] Device cpu [2020-03-10 14:37:07,069 INFO] loading archive file ../bert-base-chinese [2020-03-10 14:37:07,074 INFO] Model config { "attention_probs_dropout_prob": 0.1, "directionality": "bidi", "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "initializer_range": 0.02, "intermediate_size": 3072, "max_position_embeddings": 512, "num_attention_heads": 12, "num_hidden_layers": 12, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "type_vocab_size": 2, "vocab_size": 21128 } [2020-03-10 14:37:12,320 INFO] Loading checkpoint from ../models/bert_classifier/model_step_10001.pt [2020-03-10 14:37:45,133 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-03-10 14:37:45,967 INFO] * number of parameters: 102268417 [2020-03-10 14:37:50,463 INFO] Start training... [2020-03-10 14:37:52,290 INFO] Loading train dataset from ../bert_data_test\chinascope.train.5.bert.pt, number of examples: 4704 [2020-03-10 14:37:52,293 INFO] loaded:../bert_data_test\chinascope.train.5.bert.pt [2020-03-10 14:38:30,324 INFO] Device ID -1 [2020-03-10 14:38:30,325 INFO] Device cpu [2020-03-10 14:38:38,066 INFO] loading archive file ../bert-base-chinese [2020-03-10 14:38:38,074 INFO] Model config { "attention_probs_dropout_prob": 0.1, "directionality": "bidi", "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "initializer_range": 0.02, "intermediate_size": 3072, "max_position_embeddings": 512, "num_attention_heads": 12, "num_hidden_layers": 12, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "type_vocab_size": 2, "vocab_size": 21128 } [2020-03-10 14:38:40,826 INFO] Loading checkpoint from ../models/bert_classifier/model_step_10001.pt [2020-03-10 14:38:43,479 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-03-10 14:38:43,777 INFO] * number of parameters: 102268417 [2020-03-10 14:38:43,799 INFO] Start training... [2020-03-10 14:38:44,093 INFO] Loading train dataset from ../bert_data_test\chinascope.train.5.bert.pt, number of examples: 4704 [2020-03-10 14:38:44,095 INFO] loaded:../bert_data_test\chinascope.train.5.bert.pt [2020-03-10 14:44:28,634 INFO] Device ID -1 [2020-03-10 14:44:28,635 INFO] Device cpu [2020-03-10 14:44:28,642 INFO] loading archive file ../bert-base-chinese [2020-03-10 14:44:28,644 INFO] Model config { "attention_probs_dropout_prob": 0.1, "directionality": "bidi", "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "initializer_range": 0.02, "intermediate_size": 3072, "max_position_embeddings": 512, "num_attention_heads": 12, "num_hidden_layers": 12, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "type_vocab_size": 2, "vocab_size": 21128 } [2020-03-10 14:44:31,756 INFO] Loading checkpoint from ../models/bert_classifier/model_step_10001.pt [2020-03-10 14:44:35,697 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-03-10 14:44:35,738 INFO] * number of parameters: 102268417 [2020-03-10 14:44:35,741 INFO] Start training... [2020-03-10 14:44:36,013 INFO] Loading train dataset from ../bert_data_test\chinascope.train.5.bert.pt, number of examples: 4704 [2020-03-10 14:44:36,014 INFO] loaded:../bert_data_test\chinascope.train.5.bert.pt [2020-03-17 18:15:43,626 INFO] Device ID -1 [2020-03-17 18:15:44,273 INFO] Device cpu [2020-03-17 18:17:25,694 INFO] loading archive file ../bert-base-chinese [2020-03-17 18:17:25,701 INFO] Model config { "attention_probs_dropout_prob": 0.1, "directionality": "bidi", "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "initializer_range": 0.02, "intermediate_size": 3072, "max_position_embeddings": 512, "num_attention_heads": 12, "num_hidden_layers": 12, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "type_vocab_size": 2, "vocab_size": 21128 } [2020-03-17 18:17:35,999 INFO] Loading checkpoint from ../models/bert_classifier/model_step_10001.pt [2020-03-17 18:18:20,053 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-03-17 18:18:22,813 INFO] * number of parameters: 102268417 [2020-03-17 18:19:48,639 INFO] Start training... [2020-03-17 18:21:19,410 INFO] Device ID -1 [2020-03-17 18:21:19,411 INFO] Device cpu [2020-03-17 18:21:19,416 INFO] loading archive file ../bert-base-chinese [2020-03-17 18:21:19,419 INFO] Model config { "attention_probs_dropout_prob": 0.1, "directionality": "bidi", "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "initializer_range": 0.02, "intermediate_size": 3072, "max_position_embeddings": 512, "num_attention_heads": 12, "num_hidden_layers": 12, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "type_vocab_size": 2, "vocab_size": 21128 } [2020-03-17 18:21:21,984 INFO] Loading checkpoint from ../models/bert_classifier/model_step_10001.pt [2020-03-17 18:21:24,023 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-03-17 18:21:24,066 INFO] * number of parameters: 102268417 [2020-03-17 18:21:30,500 INFO] Start training... [2020-03-17 18:23:30,925 INFO] Device ID -1 [2020-03-17 18:23:30,926 INFO] Device cpu [2020-03-17 18:23:30,931 INFO] loading archive file ../bert-base-chinese [2020-03-17 18:23:30,934 INFO] Model config { "attention_probs_dropout_prob": 0.1, "directionality": "bidi", "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "initializer_range": 0.02, "intermediate_size": 3072, "max_position_embeddings": 512, "num_attention_heads": 12, "num_hidden_layers": 12, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "type_vocab_size": 2, "vocab_size": 21128 } [2020-03-17 18:23:33,220 INFO] Loading checkpoint from ../models/bert_classifier/model_step_10001.pt [2020-03-17 18:23:35,563 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-03-17 18:23:35,598 INFO] * number of parameters: 102268417 [2020-03-17 18:23:40,040 INFO] Start training... [2020-03-17 18:24:10,076 INFO] Loading train dataset from ../train_data\chinascope.train.5.bert.pt, number of examples: 4704 [2020-03-17 18:24:10,077 INFO] loaded:../train_data\chinascope.train.5.bert.pt [2020-03-19 10:03:55,966 INFO] Device ID -1 [2020-03-19 10:03:55,966 INFO] Device cpu [2020-03-19 10:03:55,982 ERROR] Model name '/appcom/apps/chengmengli704/pretrained_model/bert_base/bert-base-chinese' was not found in model name list (bert-base-uncased, bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, bert-base-multilingual-cased, bert-base-chinese). We assumed '/appcom/apps/chengmengli704/pretrained_model/bert_base/bert-base-chinese' was a path or url but couldn't find any file associated to this path or url. [2020-03-19 10:04:47,909 INFO] Device ID -1 [2020-03-19 10:04:47,909 INFO] Device cpu [2020-03-19 10:04:48,081 INFO] https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese.tar.gz not found in cache, downloading to D:\Users\JIANGXINFA895\AppData\Local\Temp\tmpgo72gmtg [2020-03-19 10:04:48,081 ERROR] Model name 'bert-base-chinese' was not found in model name list (bert-base-uncased, bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, bert-base-multilingual-cased, bert-base-chinese). We assumed 'https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese.tar.gz' was a path or url but couldn't find any file associated to this path or url. [2020-03-19 10:06:04,684 INFO] Device ID -1 [2020-03-19 10:06:04,684 INFO] Device cpu [2020-03-19 10:06:04,824 INFO] https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese.tar.gz not found in cache, downloading to D:\Users\JIANGXINFA895\AppData\Local\Temp\tmpr4_p506s [2020-03-19 10:06:04,840 ERROR] Model name 'bert-base-chinese' was not found in model name list (bert-base-uncased, bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, bert-base-multilingual-cased, bert-base-chinese). We assumed 'https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese.tar.gz' was a path or url but couldn't find any file associated to this path or url. [2020-03-19 10:08:05,041 INFO] Device ID -1 [2020-03-19 10:08:05,041 INFO] Device cpu [2020-03-19 10:10:52,014 INFO] Device ID -1 [2020-03-19 10:10:52,014 INFO] Device cpu [2020-03-19 10:10:52,014 INFO] loading archive file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese-master\bert-base-chinese [2020-03-19 10:10:52,014 INFO] Model config { "attention_probs_dropout_prob": 0.1, "directionality": "bidi", "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "initializer_range": 0.02, "intermediate_size": 3072, "max_position_embeddings": 512, "num_attention_heads": 12, "num_hidden_layers": 12, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "type_vocab_size": 2, "vocab_size": 21128 } [2020-03-19 10:10:55,077 INFO] Loading checkpoint from models/bert_classifier/model_step_10001.pt [2020-03-19 10:10:59,078 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-03-19 10:10:59,125 INFO] * number of parameters: 102268417 [2020-03-19 10:10:59,125 INFO] Start training... [2020-03-19 10:33:45,231 INFO] Device ID -1 [2020-03-19 10:33:45,231 INFO] Device cpu [2020-03-19 10:34:14,420 INFO] loading archive file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese-master\bert-base-chinese [2020-03-19 10:34:14,426 INFO] Model config { "attention_probs_dropout_prob": 0.1, "directionality": "bidi", "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "initializer_range": 0.02, "intermediate_size": 3072, "max_position_embeddings": 512, "num_attention_heads": 12, "num_hidden_layers": 12, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "type_vocab_size": 2, "vocab_size": 21128 } [2020-03-19 10:35:55,953 INFO] Loading checkpoint from models/bert_classifier/model_step_10001.pt [2020-03-19 10:37:03,039 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-03-19 10:37:49,578 INFO] * number of parameters: 102268417 [2020-03-19 10:37:49,582 INFO] Start training... [2020-03-19 10:37:49,777 INFO] Loading train dataset from train_data\gonggao.train.sharesize_2000.12.bert.pt, number of examples: 1952 [2020-03-19 10:37:49,778 INFO] loaded:train_data\gonggao.train.sharesize_2000.12.bert.pt [2020-03-19 10:38:09,806 INFO] Device ID -1 [2020-03-19 10:38:09,807 INFO] Device cpu [2020-03-19 10:38:19,760 INFO] Device ID -1 [2020-03-19 10:38:19,761 INFO] Device cpu [2020-03-19 10:38:19,767 INFO] loading archive file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese-master\bert-base-chinese [2020-03-19 10:38:19,769 INFO] Model config { "attention_probs_dropout_prob": 0.1, "directionality": "bidi", "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "initializer_range": 0.02, "intermediate_size": 3072, "max_position_embeddings": 512, "num_attention_heads": 12, "num_hidden_layers": 12, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "type_vocab_size": 2, "vocab_size": 21128 } [2020-03-19 10:38:22,718 INFO] Loading checkpoint from models/bert_classifier/model_step_10001.pt [2020-03-19 10:38:26,512 INFO] model load success............ [2020-03-19 10:38:26,512 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): BertLayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-03-19 10:38:26,533 INFO] * number of parameters: 102268417 [2020-03-19 10:38:26,534 INFO] Start training... [2020-03-19 11:18:56,525 INFO] Device ID -1 [2020-03-19 11:18:57,067 INFO] Device cpu [2020-03-19 11:24:58,286 INFO] Device ID -1 [2020-03-19 11:24:58,625 INFO] Device cpu [2020-03-19 11:25:09,139 INFO] loading configuration file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese-transfo\bert-base-chinese\config.json [2020-03-19 11:25:09,151 INFO] Model config BertConfig { "architectures": null, "attention_probs_dropout_prob": 0.1, "bos_token_id": null, "directionality": "bidi", "do_sample": false, "eos_token_ids": null, "finetuning_task": null, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "initializer_range": 0.02, "intermediate_size": 3072, "is_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "layer_norm_eps": 1e-12, "length_penalty": 1.0, "max_length": 20, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, "num_beams": 1, "num_hidden_layers": 12, "num_labels": 2, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_past": true, "pad_token_id": null, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "pruned_heads": {}, "repetition_penalty": 1.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "torchscript": false, "type_vocab_size": 2, "use_bfloat16": false, "vocab_size": 21128 } [2020-03-19 11:25:09,154 INFO] loading weights file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese-transfo\bert-base-chinese\pytorch_model.bin [2020-03-19 11:25:39,414 INFO] model load success............ [2020-03-19 11:25:40,186 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-03-19 11:25:41,205 INFO] * number of parameters: 102268417 [2020-03-19 11:25:46,239 INFO] Start training... [2020-03-19 11:25:49,834 INFO] Loading train dataset from train_data\gonggao.train.sharesize_2000.12.bert.pt, number of examples: 1952 [2020-03-19 11:25:49,836 INFO] loaded:train_data\gonggao.train.sharesize_2000.12.bert.pt [2020-03-19 11:27:20,650 INFO] Device ID -1 [2020-03-19 11:27:21,188 INFO] Device cpu [2020-03-19 11:27:24,140 INFO] loading configuration file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese-transfo\bert-base-chinese\config.json [2020-03-19 11:27:24,147 INFO] Model config BertConfig { "architectures": null, "attention_probs_dropout_prob": 0.1, "bos_token_id": null, "directionality": "bidi", "do_sample": false, "eos_token_ids": null, "finetuning_task": null, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "initializer_range": 0.02, "intermediate_size": 3072, "is_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "layer_norm_eps": 1e-12, "length_penalty": 1.0, "max_length": 20, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, "num_beams": 1, "num_hidden_layers": 12, "num_labels": 2, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_past": true, "pad_token_id": null, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "pruned_heads": {}, "repetition_penalty": 1.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "torchscript": false, "type_vocab_size": 2, "use_bfloat16": false, "vocab_size": 21128 } [2020-03-19 11:27:24,149 INFO] loading weights file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese-transfo\bert-base-chinese\pytorch_model.bin [2020-03-19 11:27:35,824 INFO] model load success............ [2020-03-19 11:27:35,825 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-03-19 11:27:35,981 INFO] * number of parameters: 102268417 [2020-03-19 11:27:35,982 INFO] Start training... [2020-03-19 11:27:36,279 INFO] Loading train dataset from train_data\gonggao.train.sharesize_2000.12.bert.pt, number of examples: 1952 [2020-03-19 11:27:36,280 INFO] loaded:train_data\gonggao.train.sharesize_2000.12.bert.pt [2020-03-19 11:32:16,715 INFO] Device ID -1 [2020-03-19 11:32:16,716 INFO] Device cpu [2020-03-19 11:32:16,723 INFO] loading configuration file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese-transfo\bert-base-chinese\config.json [2020-03-19 11:32:16,726 INFO] Model config BertConfig { "architectures": null, "attention_probs_dropout_prob": 0.1, "bos_token_id": null, "directionality": "bidi", "do_sample": false, "eos_token_ids": null, "finetuning_task": null, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "initializer_range": 0.02, "intermediate_size": 3072, "is_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "layer_norm_eps": 1e-12, "length_penalty": 1.0, "max_length": 20, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, "num_beams": 1, "num_hidden_layers": 12, "num_labels": 2, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_past": true, "pad_token_id": null, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "pruned_heads": {}, "repetition_penalty": 1.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "torchscript": false, "type_vocab_size": 2, "use_bfloat16": false, "vocab_size": 21128 } [2020-03-19 11:32:16,727 INFO] loading weights file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese-transfo\bert-base-chinese\pytorch_model.bin [2020-03-19 11:32:20,277 INFO] model load success............ [2020-03-19 11:32:20,277 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-03-19 11:32:20,305 INFO] * number of parameters: 102268417 [2020-03-19 11:32:20,305 INFO] Start training... [2020-03-19 11:32:20,610 INFO] Loading train dataset from train_data\gonggao.train.sharesize_2000.12.bert.pt, number of examples: 1952 [2020-03-19 11:32:20,610 INFO] loaded:train_data\gonggao.train.sharesize_2000.12.bert.pt [2020-03-19 11:39:30,493 INFO] Device ID -1 [2020-03-19 11:39:30,494 INFO] Device cpu [2020-03-19 11:39:30,503 INFO] loading configuration file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese-transfo\bert-base-chinese\config.json [2020-03-19 11:39:30,508 INFO] Model config BertConfig { "architectures": null, "attention_probs_dropout_prob": 0.1, "bos_token_id": null, "directionality": "bidi", "do_sample": false, "eos_token_ids": null, "finetuning_task": null, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "initializer_range": 0.02, "intermediate_size": 3072, "is_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "layer_norm_eps": 1e-12, "length_penalty": 1.0, "max_length": 20, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, "num_beams": 1, "num_hidden_layers": 12, "num_labels": 2, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_past": true, "pad_token_id": null, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "pruned_heads": {}, "repetition_penalty": 1.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "torchscript": false, "type_vocab_size": 2, "use_bfloat16": false, "vocab_size": 21128 } [2020-03-19 11:39:30,509 INFO] loading weights file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese-transfo\bert-base-chinese\pytorch_model.bin [2020-03-19 11:39:33,977 INFO] model load success............ [2020-03-19 11:39:33,977 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-03-19 11:39:34,000 INFO] * number of parameters: 102268417 [2020-03-19 11:39:34,000 INFO] Start training... [2020-03-19 11:39:34,291 INFO] Loading train dataset from train_data\gonggao.train.sharesize_2000.12.bert.pt, number of examples: 1952 [2020-03-19 11:39:34,292 INFO] loaded:train_data\gonggao.train.sharesize_2000.12.bert.pt [2020-03-19 13:32:46,320 INFO] Device ID -1 [2020-03-19 13:32:46,321 INFO] Device cpu [2020-03-19 13:32:46,325 INFO] loading configuration file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese-transfo\bert-base-chinese\config.json [2020-03-19 13:32:46,326 INFO] Model config BertConfig { "architectures": null, "attention_probs_dropout_prob": 0.1, "bos_token_id": null, "directionality": "bidi", "do_sample": false, "eos_token_ids": null, "finetuning_task": null, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "initializer_range": 0.02, "intermediate_size": 3072, "is_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "layer_norm_eps": 1e-12, "length_penalty": 1.0, "max_length": 20, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, "num_beams": 1, "num_hidden_layers": 12, "num_labels": 2, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_past": true, "pad_token_id": null, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "pruned_heads": {}, "repetition_penalty": 1.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "torchscript": false, "type_vocab_size": 2, "use_bfloat16": false, "vocab_size": 21128 } [2020-03-19 13:32:46,326 INFO] loading weights file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese-transfo\bert-base-chinese\pytorch_model.bin [2020-03-19 13:32:49,115 INFO] model load success............ [2020-03-19 13:32:49,116 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-03-19 13:32:49,133 INFO] * number of parameters: 102268417 [2020-03-19 13:32:49,133 INFO] Start training... [2020-03-19 13:32:49,372 INFO] Loading train dataset from train_data\gonggao.train.sharesize_2000.12.bert.pt, number of examples: 1952 [2020-03-19 13:32:49,372 INFO] loaded:train_data\gonggao.train.sharesize_2000.12.bert.pt [2020-03-19 14:08:53,613 INFO] Device ID -1 [2020-03-19 14:08:53,614 INFO] Device cpu [2020-03-19 14:08:53,621 INFO] loading configuration file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese-transfo\bert-base-chinese\config.json [2020-03-19 14:08:53,622 INFO] Model config BertConfig { "architectures": null, "attention_probs_dropout_prob": 0.1, "bos_token_id": null, "directionality": "bidi", "do_sample": false, "eos_token_ids": null, "finetuning_task": null, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "initializer_range": 0.02, "intermediate_size": 3072, "is_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "layer_norm_eps": 1e-12, "length_penalty": 1.0, "max_length": 20, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, "num_beams": 1, "num_hidden_layers": 12, "num_labels": 2, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_past": true, "pad_token_id": null, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "pruned_heads": {}, "repetition_penalty": 1.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "torchscript": false, "type_vocab_size": 2, "use_bfloat16": false, "vocab_size": 21128 } [2020-03-19 14:08:53,624 INFO] loading weights file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese-transfo\bert-base-chinese\pytorch_model.bin [2020-03-19 14:08:56,978 INFO] model load success............ [2020-03-19 14:08:56,979 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-03-19 14:08:56,993 INFO] * number of parameters: 102268417 [2020-03-19 14:08:56,994 INFO] Start training... [2020-03-19 14:08:57,242 INFO] Loading train dataset from train_data\gonggao.train.sharesize_2000.17.bert.pt, number of examples: 1955 [2020-03-19 14:08:57,243 INFO] loaded:train_data\gonggao.train.sharesize_2000.17.bert.pt [2020-04-25 16:17:31,789 INFO] Device ID -1 [2020-04-25 16:17:32,520 INFO] Device cpu [2020-04-25 16:17:38,322 INFO] loading configuration file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese\bert-base-chinese\config.json [2020-04-25 16:17:38,337 INFO] Model config BertConfig { "architectures": null, "attention_probs_dropout_prob": 0.1, "bos_token_id": null, "directionality": "bidi", "do_sample": false, "eos_token_ids": null, "finetuning_task": null, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "initializer_range": 0.02, "intermediate_size": 3072, "is_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "layer_norm_eps": 1e-12, "length_penalty": 1.0, "max_length": 20, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, "num_beams": 1, "num_hidden_layers": 12, "num_labels": 2, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_past": true, "pad_token_id": null, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "pruned_heads": {}, "repetition_penalty": 1.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "torchscript": false, "type_vocab_size": 2, "use_bfloat16": false, "vocab_size": 21128 } [2020-04-25 16:17:38,340 INFO] loading weights file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese\bert-base-chinese\pytorch_model.bin [2020-04-25 16:17:44,875 INFO] Loading checkpoint from models/bert_classifier/model_step_20142.pt [2020-04-25 16:18:02,309 INFO] model load success............ [2020-04-25 16:18:02,314 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-04-25 16:18:02,713 INFO] * number of parameters: 102268417 [2020-04-25 16:18:14,407 INFO] Start training... [2020-04-25 16:18:25,769 INFO] Loading train dataset from train_data\gonggao.train.sharesize_2000.12.bert.pt, number of examples: 1952 [2020-04-25 16:18:25,775 INFO] loaded:train_data\gonggao.train.sharesize_2000.12.bert.pt [2020-04-25 16:18:30,135 INFO] Saving checkpoint models/bert_classifier\model_step_36424.pt [2020-04-25 16:22:17,791 INFO] Device ID -1 [2020-04-25 16:22:17,793 INFO] Device cpu [2020-04-25 16:22:17,818 INFO] loading configuration file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese\bert-base-chinese\config.json [2020-04-25 16:22:17,835 INFO] Model config BertConfig { "architectures": null, "attention_probs_dropout_prob": 0.1, "bos_token_id": null, "directionality": "bidi", "do_sample": false, "eos_token_ids": null, "finetuning_task": null, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "initializer_range": 0.02, "intermediate_size": 3072, "is_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "layer_norm_eps": 1e-12, "length_penalty": 1.0, "max_length": 20, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, "num_beams": 1, "num_hidden_layers": 12, "num_labels": 2, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_past": true, "pad_token_id": null, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "pruned_heads": {}, "repetition_penalty": 1.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "torchscript": false, "type_vocab_size": 2, "use_bfloat16": false, "vocab_size": 21128 } [2020-04-25 16:22:17,838 INFO] loading weights file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese\bert-base-chinese\pytorch_model.bin [2020-04-25 16:22:21,570 INFO] Loading checkpoint from models/bert_classifier/model_step_20142.pt [2020-04-25 16:22:25,940 INFO] model load success............ [2020-04-25 16:22:25,941 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-04-25 16:22:26,102 INFO] * number of parameters: 102268417 [2020-04-25 16:22:26,112 INFO] Start training... [2020-04-25 16:22:26,311 INFO] Loading train dataset from train_data\gonggao.train.sharesize_2000.12.bert.pt, number of examples: 1952 [2020-04-25 16:22:26,313 INFO] loaded:train_data\gonggao.train.sharesize_2000.12.bert.pt [2020-04-25 16:31:41,857 INFO] Device ID -1 [2020-04-25 16:31:41,858 INFO] Device cpu [2020-04-25 16:31:41,867 INFO] loading configuration file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese\bert-base-chinese\config.json [2020-04-25 16:31:41,869 INFO] Model config BertConfig { "architectures": null, "attention_probs_dropout_prob": 0.1, "bos_token_id": null, "directionality": "bidi", "do_sample": false, "eos_token_ids": null, "finetuning_task": null, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "initializer_range": 0.02, "intermediate_size": 3072, "is_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "layer_norm_eps": 1e-12, "length_penalty": 1.0, "max_length": 20, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, "num_beams": 1, "num_hidden_layers": 12, "num_labels": 2, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_past": true, "pad_token_id": null, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "pruned_heads": {}, "repetition_penalty": 1.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "torchscript": false, "type_vocab_size": 2, "use_bfloat16": false, "vocab_size": 21128 } [2020-04-25 16:31:41,870 INFO] loading weights file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese\bert-base-chinese\pytorch_model.bin [2020-04-25 16:31:44,876 INFO] Loading checkpoint from models/bert_classifier/model_step_20142.pt [2020-04-25 16:31:46,968 INFO] model load success............ [2020-04-25 16:31:46,969 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-04-25 16:31:46,999 INFO] * number of parameters: 102268417 [2020-04-25 16:31:47,000 INFO] Start training... [2020-04-25 16:31:47,177 INFO] Loading train dataset from train_data\gonggao.train.sharesize_2000.12.bert.pt, number of examples: 1952 [2020-04-25 16:31:47,178 INFO] loaded:train_data\gonggao.train.sharesize_2000.12.bert.pt [2020-04-25 16:34:06,079 INFO] Device ID -1 [2020-04-25 16:34:06,080 INFO] Device cpu [2020-04-25 16:34:06,087 INFO] loading configuration file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese\bert-base-chinese\config.json [2020-04-25 16:34:06,090 INFO] Model config BertConfig { "architectures": null, "attention_probs_dropout_prob": 0.1, "bos_token_id": null, "directionality": "bidi", "do_sample": false, "eos_token_ids": null, "finetuning_task": null, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "initializer_range": 0.02, "intermediate_size": 3072, "is_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "layer_norm_eps": 1e-12, "length_penalty": 1.0, "max_length": 20, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, "num_beams": 1, "num_hidden_layers": 12, "num_labels": 2, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_past": true, "pad_token_id": null, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "pruned_heads": {}, "repetition_penalty": 1.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "torchscript": false, "type_vocab_size": 2, "use_bfloat16": false, "vocab_size": 21128 } [2020-04-25 16:34:06,091 INFO] loading weights file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese\bert-base-chinese\pytorch_model.bin [2020-04-25 16:34:08,922 INFO] Loading checkpoint from models/bert_classifier/model_step_20142.pt [2020-04-25 16:34:10,957 INFO] model load success............ [2020-04-25 16:34:10,957 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-04-25 16:34:10,979 INFO] * number of parameters: 102268417 [2020-04-25 16:34:10,980 INFO] Start training... [2020-04-25 16:34:11,280 INFO] Loading train dataset from train_data\gonggao.train.sharesize_2000.12.bert.pt, number of examples: 1952 [2020-04-25 16:34:11,280 INFO] loaded:train_data\gonggao.train.sharesize_2000.12.bert.pt [2020-04-25 16:35:37,014 INFO] Device ID -1 [2020-04-25 16:35:37,015 INFO] Device cpu [2020-04-25 16:35:37,022 INFO] loading configuration file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese\bert-base-chinese\config.json [2020-04-25 16:35:37,024 INFO] Model config BertConfig { "architectures": null, "attention_probs_dropout_prob": 0.1, "bos_token_id": null, "directionality": "bidi", "do_sample": false, "eos_token_ids": null, "finetuning_task": null, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "initializer_range": 0.02, "intermediate_size": 3072, "is_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "layer_norm_eps": 1e-12, "length_penalty": 1.0, "max_length": 20, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, "num_beams": 1, "num_hidden_layers": 12, "num_labels": 2, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_past": true, "pad_token_id": null, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "pruned_heads": {}, "repetition_penalty": 1.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "torchscript": false, "type_vocab_size": 2, "use_bfloat16": false, "vocab_size": 21128 } [2020-04-25 16:35:37,025 INFO] loading weights file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese\bert-base-chinese\pytorch_model.bin [2020-04-25 16:35:39,813 INFO] Loading checkpoint from models/bert_classifier/model_step_20142.pt [2020-04-25 16:35:41,855 INFO] model load success............ [2020-04-25 16:35:41,856 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-04-25 16:35:41,888 INFO] * number of parameters: 102268417 [2020-04-25 16:35:41,890 INFO] Start training... [2020-04-25 16:35:42,066 INFO] Loading train dataset from train_data\gonggao.train.sharesize_2000.12.bert.pt, number of examples: 1952 [2020-04-25 16:35:42,067 INFO] loaded:train_data\gonggao.train.sharesize_2000.12.bert.pt [2020-04-25 16:37:39,977 INFO] Device ID -1 [2020-04-25 16:37:39,978 INFO] Device cpu [2020-04-25 16:37:39,985 INFO] loading configuration file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese\bert-base-chinese\config.json [2020-04-25 16:37:39,988 INFO] Model config BertConfig { "architectures": null, "attention_probs_dropout_prob": 0.1, "bos_token_id": null, "directionality": "bidi", "do_sample": false, "eos_token_ids": null, "finetuning_task": null, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "initializer_range": 0.02, "intermediate_size": 3072, "is_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "layer_norm_eps": 1e-12, "length_penalty": 1.0, "max_length": 20, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, "num_beams": 1, "num_hidden_layers": 12, "num_labels": 2, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_past": true, "pad_token_id": null, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "pruned_heads": {}, "repetition_penalty": 1.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "torchscript": false, "type_vocab_size": 2, "use_bfloat16": false, "vocab_size": 21128 } [2020-04-25 16:37:39,988 INFO] loading weights file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese\bert-base-chinese\pytorch_model.bin [2020-04-25 16:37:42,926 INFO] Loading checkpoint from models/bert_classifier/model_step_20142.pt [2020-04-25 16:37:45,208 INFO] model load success............ [2020-04-25 16:37:45,208 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-04-25 16:37:45,236 INFO] * number of parameters: 102268417 [2020-04-25 16:37:45,239 INFO] Start training... [2020-04-25 16:37:45,480 INFO] Loading train dataset from train_data\gonggao.train.sharesize_2000.12.bert.pt, number of examples: 1952 [2020-04-25 16:37:45,480 INFO] loaded:train_data\gonggao.train.sharesize_2000.12.bert.pt [2020-04-25 16:38:36,745 INFO] Device ID -1 [2020-04-25 16:38:36,746 INFO] Device cpu [2020-04-25 16:38:36,753 INFO] loading configuration file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese\bert-base-chinese\config.json [2020-04-25 16:38:36,755 INFO] Model config BertConfig { "architectures": null, "attention_probs_dropout_prob": 0.1, "bos_token_id": null, "directionality": "bidi", "do_sample": false, "eos_token_ids": null, "finetuning_task": null, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "initializer_range": 0.02, "intermediate_size": 3072, "is_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "layer_norm_eps": 1e-12, "length_penalty": 1.0, "max_length": 20, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, "num_beams": 1, "num_hidden_layers": 12, "num_labels": 2, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_past": true, "pad_token_id": null, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "pruned_heads": {}, "repetition_penalty": 1.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "torchscript": false, "type_vocab_size": 2, "use_bfloat16": false, "vocab_size": 21128 } [2020-04-25 16:38:36,756 INFO] loading weights file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese\bert-base-chinese\pytorch_model.bin [2020-04-25 16:38:39,601 INFO] Loading checkpoint from models/bert_classifier/model_step_20142.pt [2020-04-25 16:38:41,770 INFO] model load success............ [2020-04-25 16:38:41,770 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-04-25 16:38:41,798 INFO] * number of parameters: 102268417 [2020-04-25 16:38:41,801 INFO] Start training... [2020-04-25 16:38:41,983 INFO] Loading train dataset from train_data\gonggao.train.sharesize_2000.12.bert.pt, number of examples: 1952 [2020-04-25 16:38:41,984 INFO] loaded:train_data\gonggao.train.sharesize_2000.12.bert.pt [2020-04-25 17:55:21,345 INFO] Device ID -1 [2020-04-25 17:55:21,346 INFO] Device cpu [2020-04-25 17:55:21,354 INFO] loading configuration file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese\bert-base-chinese\config.json [2020-04-25 17:55:21,356 INFO] Model config BertConfig { "architectures": null, "attention_probs_dropout_prob": 0.1, "bos_token_id": null, "directionality": "bidi", "do_sample": false, "eos_token_ids": null, "finetuning_task": null, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "initializer_range": 0.02, "intermediate_size": 3072, "is_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "layer_norm_eps": 1e-12, "length_penalty": 1.0, "max_length": 20, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, "num_beams": 1, "num_hidden_layers": 12, "num_labels": 2, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_past": true, "pad_token_id": null, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "pruned_heads": {}, "repetition_penalty": 1.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "torchscript": false, "type_vocab_size": 2, "use_bfloat16": false, "vocab_size": 21128 } [2020-04-25 17:55:21,357 INFO] loading weights file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese\bert-base-chinese\pytorch_model.bin [2020-04-25 17:55:53,753 INFO] Device ID -1 [2020-04-25 17:55:53,754 INFO] Device cpu [2020-04-25 17:55:53,761 INFO] loading configuration file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese\bert-base-chinese\config.json [2020-04-25 17:55:53,763 INFO] Model config BertConfig { "architectures": null, "attention_probs_dropout_prob": 0.1, "bos_token_id": null, "directionality": "bidi", "do_sample": false, "eos_token_ids": null, "finetuning_task": null, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "initializer_range": 0.02, "intermediate_size": 3072, "is_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "layer_norm_eps": 1e-12, "length_penalty": 1.0, "max_length": 20, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, "num_beams": 1, "num_hidden_layers": 12, "num_labels": 2, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_past": true, "pad_token_id": null, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "pruned_heads": {}, "repetition_penalty": 1.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "torchscript": false, "type_vocab_size": 2, "use_bfloat16": false, "vocab_size": 21128 } [2020-04-25 17:55:53,764 INFO] loading weights file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese\bert-base-chinese\pytorch_model.bin [2020-04-25 17:55:56,559 INFO] Loading checkpoint from models/bert_classifier/model_step_20142.pt [2020-04-25 17:56:00,274 INFO] model load success............ [2020-04-25 17:56:00,274 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-04-25 17:56:00,320 INFO] * number of parameters: 102268417 [2020-04-25 17:56:00,320 INFO] Start training... [2020-04-25 17:56:00,526 INFO] Loading train dataset from train_data\gonggao.train.sharesize_2000.12.bert.pt, number of examples: 1952 [2020-04-25 17:56:00,527 INFO] loaded:train_data\gonggao.train.sharesize_2000.12.bert.pt [2020-04-25 17:56:19,240 INFO] Device ID -1 [2020-04-25 17:56:19,241 INFO] Device cpu [2020-04-25 17:56:19,248 INFO] loading configuration file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese\bert-base-chinese\config.json [2020-04-25 17:56:19,251 INFO] Model config BertConfig { "architectures": null, "attention_probs_dropout_prob": 0.1, "bos_token_id": null, "directionality": "bidi", "do_sample": false, "eos_token_ids": null, "finetuning_task": null, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "initializer_range": 0.02, "intermediate_size": 3072, "is_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "layer_norm_eps": 1e-12, "length_penalty": 1.0, "max_length": 20, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, "num_beams": 1, "num_hidden_layers": 12, "num_labels": 2, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_past": true, "pad_token_id": null, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "pruned_heads": {}, "repetition_penalty": 1.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "torchscript": false, "type_vocab_size": 2, "use_bfloat16": false, "vocab_size": 21128 } [2020-04-25 17:56:19,252 INFO] loading weights file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese\bert-base-chinese\pytorch_model.bin [2020-04-25 17:56:22,237 INFO] Loading checkpoint from models/bert_classifier/model_step_20142.pt [2020-04-25 17:56:24,408 INFO] model load success............ [2020-04-25 17:56:24,409 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-04-25 17:56:24,439 INFO] * number of parameters: 102268417 [2020-04-25 17:56:24,440 INFO] Start training... [2020-04-25 17:56:24,633 INFO] Loading train dataset from train_data\gonggao.train.sharesize_2000.12.bert.pt, number of examples: 1952 [2020-04-25 17:56:24,633 INFO] loaded:train_data\gonggao.train.sharesize_2000.12.bert.pt [2020-04-25 18:06:23,155 INFO] Device ID -1 [2020-04-25 18:06:23,156 INFO] Device cpu [2020-04-25 18:06:23,165 INFO] loading configuration file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese\bert-base-chinese\config.json [2020-04-25 18:06:23,169 INFO] Model config BertConfig { "architectures": null, "attention_probs_dropout_prob": 0.1, "bos_token_id": null, "directionality": "bidi", "do_sample": false, "eos_token_ids": null, "finetuning_task": null, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "initializer_range": 0.02, "intermediate_size": 3072, "is_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "layer_norm_eps": 1e-12, "length_penalty": 1.0, "max_length": 20, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, "num_beams": 1, "num_hidden_layers": 12, "num_labels": 2, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_past": true, "pad_token_id": null, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "pruned_heads": {}, "repetition_penalty": 1.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "torchscript": false, "type_vocab_size": 2, "use_bfloat16": false, "vocab_size": 21128 } [2020-04-25 18:06:23,171 INFO] loading weights file D:\Users\JIANGXINFA895\Documents\PingAn\ժҪ\bertsum-chinese\bert-base-chinese\pytorch_model.bin [2020-04-25 18:06:26,982 INFO] Loading checkpoint from models/bert_classifier/model_step_20142.pt [2020-04-25 18:06:31,656 INFO] model load success............ [2020-04-25 18:06:31,659 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-04-25 18:06:31,863 INFO] * number of parameters: 102268417 [2020-04-25 18:06:31,865 INFO] Start training... [2020-04-25 18:06:32,097 INFO] Loading train dataset from train_data\gonggao.train.sharesize_2000.12.bert.pt, number of examples: 1952 [2020-04-25 18:06:32,099 INFO] loaded:train_data\gonggao.train.sharesize_2000.12.bert.pt [2020-06-29 18:15:41,608 INFO] Device ID -1 [2020-06-29 18:15:43,128 INFO] Device cpu [2020-06-29 18:18:02,995 INFO] Device ID -1 [2020-06-29 18:18:03,165 INFO] Device cpu [2020-06-29 18:18:05,759 INFO] loading configuration file /Users/jiang/Documents/bert/bert-base-chinese/config.json [2020-06-29 18:18:05,768 INFO] Model config BertConfig { "architectures": null, "attention_probs_dropout_prob": 0.1, "bos_token_id": null, "directionality": "bidi", "do_sample": false, "eos_token_ids": null, "finetuning_task": null, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "initializer_range": 0.02, "intermediate_size": 3072, "is_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "layer_norm_eps": 1e-12, "length_penalty": 1.0, "max_length": 20, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, "num_beams": 1, "num_hidden_layers": 12, "num_labels": 2, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_past": true, "pad_token_id": null, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "pruned_heads": {}, "repetition_penalty": 1.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "torchscript": false, "type_vocab_size": 2, "use_bfloat16": false, "vocab_size": 21128 } [2020-06-29 18:18:05,778 INFO] loading weights file /Users/jiang/Documents/bert/bert-base-chinese/pytorch_model.bin [2020-06-29 18:18:46,784 INFO] Device ID -1 [2020-06-29 18:18:46,965 INFO] Device cpu [2020-06-29 18:18:49,736 INFO] loading configuration file /Users/jiang/Documents/bert/bert-base-chinese/config.json [2020-06-29 18:18:49,743 INFO] Model config BertConfig { "architectures": null, "attention_probs_dropout_prob": 0.1, "bos_token_id": null, "directionality": "bidi", "do_sample": false, "eos_token_ids": null, "finetuning_task": null, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "initializer_range": 0.02, "intermediate_size": 3072, "is_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "layer_norm_eps": 1e-12, "length_penalty": 1.0, "max_length": 20, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, "num_beams": 1, "num_hidden_layers": 12, "num_labels": 2, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_past": true, "pad_token_id": null, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "pruned_heads": {}, "repetition_penalty": 1.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "torchscript": false, "type_vocab_size": 2, "use_bfloat16": false, "vocab_size": 21128 } [2020-06-29 18:18:49,754 INFO] loading weights file /Users/jiang/Documents/bert/bert-base-chinese/pytorch_model.bin [2020-06-29 18:20:56,715 INFO] model load success............ [2020-06-29 18:21:02,439 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-06-29 18:21:03,930 INFO] * number of parameters: 102268417 [2020-06-29 18:21:05,901 INFO] Start training... [2020-06-29 18:21:06,765 INFO] Loading train dataset from bert_data/LCSTS.train.1.bert.pt, number of examples: 14986 [2020-06-29 18:21:06,766 INFO] loaded:bert_data/LCSTS.train.1.bert.pt [2020-06-29 18:24:41,988 INFO] Device ID -1 [2020-06-29 18:24:41,989 INFO] Device cpu [2020-06-29 18:24:41,994 INFO] loading configuration file /Users/jiang/Documents/bert/bert-base-chinese/config.json [2020-06-29 18:24:41,996 INFO] Model config BertConfig { "architectures": null, "attention_probs_dropout_prob": 0.1, "bos_token_id": null, "directionality": "bidi", "do_sample": false, "eos_token_ids": null, "finetuning_task": null, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "initializer_range": 0.02, "intermediate_size": 3072, "is_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "layer_norm_eps": 1e-12, "length_penalty": 1.0, "max_length": 20, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, "num_beams": 1, "num_hidden_layers": 12, "num_labels": 2, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_past": true, "pad_token_id": null, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "pruned_heads": {}, "repetition_penalty": 1.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "torchscript": false, "type_vocab_size": 2, "use_bfloat16": false, "vocab_size": 21128 } [2020-06-29 18:24:41,998 INFO] loading weights file /Users/jiang/Documents/bert/bert-base-chinese/pytorch_model.bin [2020-06-29 18:24:45,101 INFO] model load success............ [2020-06-29 18:24:45,102 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-06-29 18:24:45,135 INFO] * number of parameters: 102268417 [2020-06-29 18:24:45,137 INFO] Start training... [2020-06-29 18:25:10,079 INFO] Loading train dataset from bert_data/LCSTS.train.1.bert.pt, number of examples: 1499 [2020-06-29 18:25:10,079 INFO] loaded:bert_data/LCSTS.train.1.bert.pt [2020-06-29 18:26:15,430 INFO] Device ID -1 [2020-06-29 18:26:15,430 INFO] Device cpu [2020-06-29 18:26:15,434 INFO] loading configuration file /Users/jiang/Documents/bert/bert-base-chinese/config.json [2020-06-29 18:26:15,437 INFO] Model config BertConfig { "architectures": null, "attention_probs_dropout_prob": 0.1, "bos_token_id": null, "directionality": "bidi", "do_sample": false, "eos_token_ids": null, "finetuning_task": null, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "initializer_range": 0.02, "intermediate_size": 3072, "is_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "layer_norm_eps": 1e-12, "length_penalty": 1.0, "max_length": 20, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, "num_beams": 1, "num_hidden_layers": 12, "num_labels": 2, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_past": true, "pad_token_id": null, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "pruned_heads": {}, "repetition_penalty": 1.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "torchscript": false, "type_vocab_size": 2, "use_bfloat16": false, "vocab_size": 21128 } [2020-06-29 18:26:15,439 INFO] loading weights file /Users/jiang/Documents/bert/bert-base-chinese/pytorch_model.bin [2020-06-29 18:26:18,598 INFO] model load success............ [2020-06-29 18:26:18,598 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-06-29 18:26:18,626 INFO] * number of parameters: 102268417 [2020-06-29 18:26:18,628 INFO] Start training... [2020-06-29 18:26:18,706 INFO] Loading train dataset from bert_data/LCSTS.train.1.bert.pt, number of examples: 1499 [2020-06-29 18:26:18,706 INFO] loaded:bert_data/LCSTS.train.1.bert.pt [2020-06-29 18:29:31,196 INFO] Device ID -1 [2020-06-29 18:29:31,196 INFO] Device cpu [2020-06-29 18:29:31,197 INFO] loading configuration file /Users/jiang/Documents/bert/bert-base-chinese/config.json [2020-06-29 18:29:31,198 INFO] Model config BertConfig { "architectures": null, "attention_probs_dropout_prob": 0.1, "bos_token_id": null, "directionality": "bidi", "do_sample": false, "eos_token_ids": null, "finetuning_task": null, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "initializer_range": 0.02, "intermediate_size": 3072, "is_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "layer_norm_eps": 1e-12, "length_penalty": 1.0, "max_length": 20, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, "num_beams": 1, "num_hidden_layers": 12, "num_labels": 2, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_past": true, "pad_token_id": null, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "pruned_heads": {}, "repetition_penalty": 1.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "torchscript": false, "type_vocab_size": 2, "use_bfloat16": false, "vocab_size": 21128 } [2020-06-29 18:29:31,199 INFO] loading weights file /Users/jiang/Documents/bert/bert-base-chinese/pytorch_model.bin [2020-06-29 18:29:34,063 INFO] model load success............ [2020-06-29 18:29:34,064 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-06-29 18:29:34,074 INFO] * number of parameters: 102268417 [2020-06-29 18:29:34,074 INFO] Start training... [2020-06-29 18:29:34,223 INFO] Loading train dataset from bert_data/LCSTS.train.1.bert.pt, number of examples: 1499 [2020-06-29 18:29:34,223 INFO] loaded:bert_data/LCSTS.train.1.bert.pt [2020-06-29 18:32:34,053 INFO] Device ID -1 [2020-06-29 18:32:34,053 INFO] Device cpu [2020-06-29 18:32:34,057 INFO] loading configuration file /Users/jiang/Documents/bert/bert-base-chinese/config.json [2020-06-29 18:32:34,058 INFO] Model config BertConfig { "architectures": null, "attention_probs_dropout_prob": 0.1, "bos_token_id": null, "directionality": "bidi", "do_sample": false, "eos_token_ids": null, "finetuning_task": null, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "initializer_range": 0.02, "intermediate_size": 3072, "is_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "layer_norm_eps": 1e-12, "length_penalty": 1.0, "max_length": 20, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, "num_beams": 1, "num_hidden_layers": 12, "num_labels": 2, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_past": true, "pad_token_id": null, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "pruned_heads": {}, "repetition_penalty": 1.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "torchscript": false, "type_vocab_size": 2, "use_bfloat16": false, "vocab_size": 21128 } [2020-06-29 18:32:34,059 INFO] loading weights file /Users/jiang/Documents/bert/bert-base-chinese/pytorch_model.bin [2020-06-29 18:32:36,977 INFO] model load success............ [2020-06-29 18:32:36,978 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-06-29 18:32:36,988 INFO] * number of parameters: 102268417 [2020-06-29 18:32:36,988 INFO] Start training... [2020-06-29 18:32:37,125 INFO] Loading train dataset from bert_data/LCSTS.train.1.bert.pt, number of examples: 1499 [2020-06-29 18:32:37,125 INFO] loaded:bert_data/LCSTS.train.1.bert.pt [2020-06-29 18:34:47,215 INFO] Device ID -1 [2020-06-29 18:34:47,215 INFO] Device cpu [2020-06-29 18:34:47,218 INFO] loading configuration file /Users/jiang/Documents/bert/bert-base-chinese/config.json [2020-06-29 18:34:47,219 INFO] Model config BertConfig { "architectures": null, "attention_probs_dropout_prob": 0.1, "bos_token_id": null, "directionality": "bidi", "do_sample": false, "eos_token_ids": null, "finetuning_task": null, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "initializer_range": 0.02, "intermediate_size": 3072, "is_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "layer_norm_eps": 1e-12, "length_penalty": 1.0, "max_length": 20, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, "num_beams": 1, "num_hidden_layers": 12, "num_labels": 2, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_past": true, "pad_token_id": null, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "pruned_heads": {}, "repetition_penalty": 1.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "torchscript": false, "type_vocab_size": 2, "use_bfloat16": false, "vocab_size": 21128 } [2020-06-29 18:34:47,220 INFO] loading weights file /Users/jiang/Documents/bert/bert-base-chinese/pytorch_model.bin [2020-06-29 18:34:50,237 INFO] model load success............ [2020-06-29 18:34:50,238 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-06-29 18:34:50,249 INFO] * number of parameters: 102268417 [2020-06-29 18:34:50,249 INFO] Start training... [2020-06-29 18:34:51,237 INFO] Loading train dataset from bert_data/LCSTS.train.1.bert.pt, number of examples: 14986 [2020-06-29 18:34:51,238 INFO] loaded:bert_data/LCSTS.train.1.bert.pt [2020-06-29 19:22:52,715 INFO] Device ID -1 [2020-06-29 19:22:52,716 INFO] Device cpu [2020-06-29 19:22:52,721 INFO] loading configuration file /Users/jiang/Documents/bert/bert-base-chinese/config.json [2020-06-29 19:22:52,721 INFO] Model config BertConfig { "architectures": null, "attention_probs_dropout_prob": 0.1, "bos_token_id": null, "directionality": "bidi", "do_sample": false, "eos_token_ids": null, "finetuning_task": null, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "initializer_range": 0.02, "intermediate_size": 3072, "is_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "layer_norm_eps": 1e-12, "length_penalty": 1.0, "max_length": 20, "max_position_embeddings": 512, "model_type": "bert", "num_attention_heads": 12, "num_beams": 1, "num_hidden_layers": 12, "num_labels": 2, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_past": true, "pad_token_id": null, "pooler_fc_size": 768, "pooler_num_attention_heads": 12, "pooler_num_fc_layers": 3, "pooler_size_per_head": 128, "pooler_type": "first_token_transform", "pruned_heads": {}, "repetition_penalty": 1.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "torchscript": false, "type_vocab_size": 2, "use_bfloat16": false, "vocab_size": 21128 } [2020-06-29 19:22:52,724 INFO] loading weights file /Users/jiang/Documents/bert/bert-base-chinese/pytorch_model.bin [2020-06-29 19:22:58,304 INFO] model load success............ [2020-06-29 19:22:58,305 INFO] Summarizer( (bert): Bert( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(21128, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (encoder): Classifier( (linear1): Linear(in_features=768, out_features=1, bias=True) (sigmoid): Sigmoid() ) ) [2020-06-29 19:22:58,316 INFO] * number of parameters: 102268417 [2020-06-29 19:22:58,317 INFO] Start training... [2020-06-29 19:22:59,333 INFO] Loading train dataset from bert_data/LCSTS.train.1.bert.pt, number of examples: 14999 [2020-06-29 19:22:59,334 INFO] loaded:bert_data/LCSTS.train.1.bert.pt ================================================ FILE: bertsum-chinese/preprocess_LAI.py ================================================ # -*- coding: utf-8 -*- import argparse import time from src.others.logging import init_logger from src.prepro import data_builder_LAI def do_format_to_bert(args): print(time.clock()) data_builder_LAI.format_to_bert(args) print(time.clock()) if __name__ == '__main__': parser = argparse.ArgumentParser() # json数据目录 parser.add_argument("-raw_path", default='json_data') # 处理json数据集名称,比如json_data/LCSTS.train.1.json,需要指定为LCSTS parser.add_argument('-dataset', default='LCSTS', type=str) # 模型输入训练,保存 parser.add_argument("-save_path", default='bert_data') ###change from 2000 to 16000 parser.add_argument("-shard_size", default=16000, type=int) # 最小句子量,文章不能低于3句话 parser.add_argument('-min_nsents', default=3, type=int) # 最大句子量,文章超过100句话 parser.add_argument('-max_nsents', default=100, type=int) # 句子最短长度 parser.add_argument('-min_src_ntokens', default=3, type=int) # 句子最大长度 parser.add_argument('-max_src_ntokens', default=150, type=int) parser.add_argument('-max_position_embeddings', default=512, type=int) parser.add_argument('-log_file', default='logs/preprocess.log') parser.add_argument('-n_cpus', default=4, type=int) bert_base_chinese = '/Users/jiang/Documents/bert/bert-base-chinese' parser.add_argument("-bert_base_chinese", type=str, default=bert_base_chinese) args = parser.parse_args() init_logger(args.log_file) data_builder_LAI.format_to_bert(args) ================================================ FILE: bertsum-chinese/requirements.txt ================================================ numpy==1.17.2 emoji==0.5.4 multiprocess==0.70.9 pytorch_pretrained_bert==0.6.2 tensorboardX==2.0 torch==1.4.0 transformers==2.5.1 ================================================ FILE: bertsum-chinese/src/__init__.py ================================================ ================================================ FILE: bertsum-chinese/src/models/__init__.py ================================================ ================================================ FILE: bertsum-chinese/src/models/data_loader.py ================================================ # -*- coding: utf-8 -*- import gc import glob import random import torch from src.others.logging import logger class Batch(object): def _pad(self, data, pad_id, width=-1): if width == -1: width = max(len(d) for d in data) rtn_data = [d + [pad_id] * (width - len(d)) for d in data] return rtn_data def __init__(self, minibatch=None, device=None, is_test=False): # minibatch:包含一个最小训练批次比如2个文本内容 data = minibatch # DataIterator:batch_buffer(self.dataset) > create_batches > minibatch if data is not None: self.batch_size = len(data) if data != []: pre_src = [x[0] for x in data] pre_labels = [x[1] for x in data] pre_segs = [x[2] for x in data] pre_clss = [x[3] for x in data] src = torch.tensor(self._pad(pre_src, 0)) labels = torch.tensor(self._pad(pre_labels, 0)) segs = torch.tensor(self._pad(pre_segs, 0)) mask = ~(src == 0) clss = torch.tensor(self._pad(pre_clss, -1)) mask_cls = ~ (clss == -1) clss[clss == -1] = 0 setattr(self, 'clss', clss.to(device)) setattr(self, 'mask_cls', mask_cls.to(device)) setattr(self, 'src', src.to(device)) setattr(self, 'labels', labels.to(device)) setattr(self, 'segs', segs.to(device)) setattr(self, 'mask', mask.to(device)) # src, labels, segs, clss, src_txt if is_test: src_str = [x[-1] for x in data] setattr(self, 'src_str', src_str) def __len__(self): return self.batch_size def batch(data, batch_size): """Yield elements from data in chunks of batch_size.""" minibatch, size_so_far = [], 0 for ex in data: minibatch.append(ex) size_so_far = simple_batch_size_fn(ex, len(minibatch)) if size_so_far == batch_size: yield minibatch minibatch, size_so_far = [], 0 elif size_so_far > batch_size: yield minibatch[:-1] minibatch, size_so_far = minibatch[-1:], simple_batch_size_fn(ex, 1) if minibatch: yield minibatch def load_dataset(args, corpus_type, shuffle): ''' 加载所有 XX.pt文件,返回的是pt文件的对象:dataset(list 包含多个字典,字典是文本处理好可直接输入的tensor) ''' assert corpus_type in ["train", "valid", "test"] def _lazy_dataset_loader(pt_file, corpus_type): dataset = torch.load(pt_file) logger.info('Loading %s dataset from %s, number of examples: %d' % (corpus_type, pt_file, len(dataset))) logger.info('loaded:%s' % (pt_file)) return dataset # 以正则表达式匹配的文件路径集 pts = sorted(glob.glob(args.bert_data_path + '.' + corpus_type + '.*.pt')) if pts: if shuffle: random.shuffle(pts) for pt in pts: yield _lazy_dataset_loader(pt, corpus_type) else: pt = args.bert_data_path + '.' + corpus_type + '.pt' yield _lazy_dataset_loader(pt, corpus_type) def simple_batch_size_fn(new, count): # 不断累求当前调节数据长度,以当前发现的最大长度(max_size) * count src, labels = new[0], new[1] global max_n_sents, max_n_tokens, max_size if count == 1: max_size = 0 max_n_sents = 0 max_n_tokens = 0 max_n_sents = max(max_n_sents, len(src)) max_size = max(max_size, max_n_sents) src_elements = count * max_size return src_elements class Dataloader(object): def __init__(self, args, datasets, batch_size, device, shuffle, is_test): self.args = args # 迭代器,每次迭代返回一个LCSTS.train.0.bert.pt内容,9000+文本数据 self.datasets = datasets self.batch_size = batch_size self.device = device self.shuffle = shuffle self.is_test = is_test self.cur_iter = self._next_dataset_iterator(datasets) assert self.cur_iter is not None def __iter__(self): # d 是一个LCSTS.train.xx.bert.pt,9000+个文本内容的yield, dataset_iter = (d for d in self.datasets) while self.cur_iter is not None: for batch in self.cur_iter: yield batch # 上一个LCSTS.train.0.bert.pt完了,开始下一个pt的yield迭代,直到None self.cur_iter = self._next_dataset_iterator(dataset_iter) def _next_dataset_iterator(self, dataset_iter): try: # 内存手动清理 if hasattr(self, "cur_dataset"): self.cur_dataset = None gc.collect() del self.cur_dataset gc.collect() self.cur_dataset = next(dataset_iter) except StopIteration: return None # 最终self.cur_dataset的数据,分self.batch_size去返回 return DataIterator(args=self.args, dataset=self.cur_dataset, batch_size=self.batch_size, device=self.device, shuffle=self.shuffle, is_test=self.is_test) class DataIterator(object): def __init__(self, args, dataset, batch_size, device=None, is_test=False, shuffle=True): self.args = args # dataset是data_loader.py -> load_dataset()加载的pt文件(在data_builder_LAI->format_to_bert生成) self.batch_size, self.is_test, self.dataset = batch_size, is_test, dataset self.iterations = 0 self.device = device self.shuffle = shuffle self.sort_key = lambda x: len(x[1]) self._iterations_this_epoch = 0 def preprocess(self, a_example, is_test): ex = a_example src = ex['src'] if 'labels' in ex: labels = ex['labels'] else: labels = ex['src_sent_labels'] segs = ex['segs'] if not self.args.use_interval: segs = [0] * len(segs) clss = ex['clss'] src_txt = ex['src_txt'] if is_test: return src, labels, segs, clss, src_txt else: return src, labels, segs, clss def batch_buffer(self, data, batch_size): # data(1个pt文件,9000+个文本数据)迭代每一个数据,追加到minibatch,直到总长度batch_size左右 minibatch, size_so_far = [], 0 for ex in data: if len(ex['src']) == 0: continue ex = self.preprocess(ex, self.is_test) if ex is None: continue minibatch.append(ex) # 以整个 minibatch最大长 * len(minibatch) 作为size_so_far,直到size_so_far>=batch_size(一般3000+那个size) size_so_far = simple_batch_size_fn(ex, len(minibatch)) if size_so_far == batch_size: yield minibatch minibatch, size_so_far = [], 0 # 超过了最后一个不反回,作为下一个batch的第一个 elif size_so_far > batch_size: yield minibatch[:-1] minibatch, size_so_far = minibatch[-1:], simple_batch_size_fn(ex, 1) if minibatch and len(minibatch) > 0: yield minibatch def create_batches(self): # 从self.datase(1个pt文件,9000+个文本数据)选一批数据,返回一batch if self.shuffle: random.shuffle(self.dataset) for buffer in self.batch_buffer(self.dataset, self.batch_size * 50): # buffer:从data中拿上千个数据,到函数batch()组成多个批次 # 以句子数量排序 p_batch = sorted(buffer, key=lambda x: len(x[3])) p_batch = batch(p_batch, self.batch_size) # 如果一个batch size 22,p_batch包含一批多个batch p_batch = list(p_batch) if self.shuffle: random.shuffle(p_batch) # 多个batch的p_batch,每次返回一个batch for b in p_batch: yield b def __iter__(self): while True: self.batches = self.create_batches() for idx, minibatch in enumerate(self.batches): # fast-forward if loaded from state if self._iterations_this_epoch > idx: continue self.iterations += 1 self._iterations_this_epoch += 1 # batch_buffer(self.dataset) > create_batches > minibatch batch = Batch(minibatch, self.device, self.is_test) yield batch return ================================================ FILE: bertsum-chinese/src/models/encoder.py ================================================ # -*- coding: utf-8 -*- import math import torch import torch.nn as nn from src.models.neural import MultiHeadedAttention, PositionwiseFeedForward from src.models.rnn import LayerNormLSTM ''' bert输出后,接入分的层 ''' class Classifier(nn.Module): def __init__(self, hidden_size): super(Classifier, self).__init__() self.linear1 = nn.Linear(hidden_size, 1) self.sigmoid = nn.Sigmoid() def forward(self, sents_vec, mask_cls): h = self.linear1(sents_vec).squeeze(-1) sent_scores = self.sigmoid(h) * mask_cls.float() return sent_scores class PositionalEncoding(nn.Module): def __init__(self, dropout, dim, max_len=5000): pe = torch.zeros(max_len, dim) position = torch.arange(0, max_len).unsqueeze(1) div_term = torch.exp((torch.arange(0, dim, 2, dtype=torch.float) * -(math.log(10000.0) / dim))) pe[:, 0::2] = torch.sin(position.float() * div_term) pe[:, 1::2] = torch.cos(position.float() * div_term) pe = pe.unsqueeze(0) super(PositionalEncoding, self).__init__() self.register_buffer('pe', pe) self.dropout = nn.Dropout(p=dropout) self.dim = dim def forward(self, emb, step=None): emb = emb * math.sqrt(self.dim) if (step): emb = emb + self.pe[:, step][:, None, :] else: emb = emb + self.pe[:, :emb.size(1)] emb = self.dropout(emb) return emb def get_emb(self, emb): return self.pe[:, :emb.size(1)] class TransformerEncoderLayer(nn.Module): def __init__(self, d_model, heads, d_ff, dropout): super(TransformerEncoderLayer, self).__init__() self.self_attn = MultiHeadedAttention( heads, d_model, dropout=dropout) self.feed_forward = PositionwiseFeedForward(d_model, d_ff, dropout) self.layer_norm = nn.LayerNorm(d_model, eps=1e-6) self.dropout = nn.Dropout(dropout) def forward(self, iter, query, inputs, mask): if (iter != 0): input_norm = self.layer_norm(inputs) else: input_norm = inputs mask = mask.unsqueeze(1) context = self.self_attn(input_norm, input_norm, input_norm, mask=mask) out = self.dropout(context) + inputs return self.feed_forward(out) class TransformerInterEncoder(nn.Module): def __init__(self, d_model, d_ff, heads, dropout, num_inter_layers=0): super(TransformerInterEncoder, self).__init__() self.d_model = d_model self.num_inter_layers = num_inter_layers self.pos_emb = PositionalEncoding(dropout, d_model) self.transformer_inter = nn.ModuleList( [TransformerEncoderLayer(d_model, heads, d_ff, dropout) for _ in range(num_inter_layers)]) self.dropout = nn.Dropout(dropout) self.layer_norm = nn.LayerNorm(d_model, eps=1e-6) self.wo = nn.Linear(d_model, 1, bias=True) self.sigmoid = nn.Sigmoid() def forward(self, top_vecs, mask): """ See :obj:`EncoderBase.forward()`""" batch_size, n_sents = top_vecs.size(0), top_vecs.size(1) pos_emb = self.pos_emb.pe[:, :n_sents] x = top_vecs * mask[:, :, None].float() x = x + pos_emb for i in range(self.num_inter_layers): x = self.transformer_inter[i](i, x, x, 1 - mask) # all_sents * max_tokens * dim x = self.layer_norm(x) sent_scores = self.sigmoid(self.wo(x)) sent_scores = sent_scores.squeeze(-1) * mask.float() return sent_scores class RNNEncoder(nn.Module): def __init__(self, bidirectional, num_layers, input_size, hidden_size, dropout=0.0): super(RNNEncoder, self).__init__() num_directions = 2 if bidirectional else 1 assert hidden_size % num_directions == 0 hidden_size = hidden_size // num_directions self.rnn = LayerNormLSTM( input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, bidirectional=bidirectional) self.wo = nn.Linear(num_directions * hidden_size, 1, bias=True) self.dropout = nn.Dropout(dropout) self.sigmoid = nn.Sigmoid() def forward(self, x, mask): """See :func:`EncoderBase.forward()`""" x = torch.transpose(x, 1, 0) memory_bank, _ = self.rnn(x) memory_bank = self.dropout(memory_bank) + x memory_bank = torch.transpose(memory_bank, 1, 0) sent_scores = self.sigmoid(self.wo(memory_bank)) sent_scores = sent_scores.squeeze(-1) * mask.float() return sent_scores ================================================ FILE: bertsum-chinese/src/models/model_builder_LAI.py ================================================ # -*- coding: utf-8 -*- import torch import torch.nn as nn from transformers import BertModel, BertConfig from torch.nn.init import xavier_uniform_ from src.models.encoder import TransformerInterEncoder, Classifier, RNNEncoder from src.models.optimizers import Optimizer ''' 模型创建 ''' def build_optim(args, model, checkpoint): saved_optimizer_state_dict = None if args.train_from != '': optim = checkpoint['optim'] saved_optimizer_state_dict = optim.optimizer.state_dict() else: optim = Optimizer( args.optim, args.lr, args.max_grad_norm, beta1=args.beta1, beta2=args.beta2, decay_method=args.decay_method, warmup_steps=args.warmup_steps) optim.set_parameters(list(model.named_parameters())) if args.train_from != '': optim.optimizer.load_state_dict(saved_optimizer_state_dict) if args.visible_gpus != '-1': for state in optim.optimizer.state.values(): for k, v in state.items(): if torch.is_tensor(v): state[k] = v.cuda() if optim.method == 'adam' and len(optim.optimizer.state) < 1: raise RuntimeError( "Error: loaded Adam optimizer from existing model" + " but optimizer state is empty") return optim class Bert(nn.Module): def __init__(self, mode_path, load_pretrained_bert, bert_config): super(Bert, self).__init__() if load_pretrained_bert: # self.model = BertModel.from_pretrained('../../directory', cache_dir=temp_dir) self.model = BertModel.from_pretrained(mode_path) else: self.model = BertModel(bert_config) def forward(self, x, segs, mask): # sequence_output, pooled_output # transformers输出最后一层,pytorch_pretrained_bert输出每层的结果 encoded_layers, _ = self.model(input_ids=x, attention_mask=mask, token_type_ids=segs) # top_vec = encoded_layers[-1] top_vec = encoded_layers return top_vec class Summarizer(nn.Module): def __init__(self, args, device, load_pretrained_bert=False, bert_config=None): super(Summarizer, self).__init__() self.args = args self.device = device self.bert = Bert(args.bert_base_chinese, load_pretrained_bert, bert_config) if args.encoder == 'classifier': self.encoder = Classifier(self.bert.model.config.hidden_size) elif args.encoder == 'transformer': self.encoder = TransformerInterEncoder(self.bert.model.config.hidden_size, args.ff_size, args.heads, args.dropout, args.inter_layers) elif args.encoder == 'rnn': self.encoder = RNNEncoder(bidirectional=True, num_layers=1, input_size=self.bert.model.config.hidden_size, hidden_size=args.rnn_size, dropout=args.dropout) elif args.encoder == 'baseline': bert_config = BertConfig(self.bert.model.config.vocab_size, hidden_size=args.hidden_size, num_hidden_layers=6, num_attention_heads=8, intermediate_size=args.ff_size) self.bert.model = BertModel(bert_config) self.encoder = Classifier(self.bert.model.config.hidden_size) if args.param_init != 0.0: for p in self.encoder.parameters(): p.data.uniform_(-args.param_init, args.param_init) if args.param_init_glorot: for p in self.encoder.parameters(): if p.dim() > 1: xavier_uniform_(p) self.to(device) def load_cp(self, pt): self.load_state_dict(pt['model'], strict=True) def forward(self, x, segs, clss, mask, mask_cls, sentence_range=None): top_vec = self.bert(x, segs, mask) sents_vec = top_vec[torch.arange(top_vec.size(0)).unsqueeze(1), clss] # sents_vec = torch.gather(top_vec, dim=0, index=clss) sents_vec = sents_vec * mask_cls[:, :, None].float() sent_scores = self.encoder(sents_vec, mask_cls).squeeze(-1) return sent_scores, mask_cls ================================================ FILE: bertsum-chinese/src/models/neural.py ================================================ # -*- coding: utf-8 -*- import math import torch import torch.nn as nn def gelu(x): return 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) class PositionwiseFeedForward(nn.Module): """ A two-layer Feed-Forward-Network with residual layer norm. Args: d_model (int): the size of input for the first-layer of the FFN. d_ff (int): the hidden layer size of the second-layer of the FNN. dropout (float): dropout probability in :math:`[0, 1)`. """ def __init__(self, d_model, d_ff, dropout=0.1): super(PositionwiseFeedForward, self).__init__() self.w_1 = nn.Linear(d_model, d_ff) self.w_2 = nn.Linear(d_ff, d_model) self.layer_norm = nn.LayerNorm(d_model, eps=1e-6) self.actv = gelu self.dropout_1 = nn.Dropout(dropout) self.dropout_2 = nn.Dropout(dropout) def forward(self, x): inter = self.dropout_1(self.actv(self.w_1(self.layer_norm(x)))) output = self.dropout_2(self.w_2(inter)) return output + x class MultiHeadedAttention(nn.Module): """ Multi-Head Attention module from "Attention is All You Need" :cite:`DBLP:journals/corr/VaswaniSPUJGKP17`. Similar to standard `dot` attention but uses multiple attention distributions simulataneously to select relevant items. .. mermaid:: graph BT A[key] B[value] C[query] O[output] subgraph Attn D[Attn 1] E[Attn 2] F[Attn N] end A --> D C --> D A --> E C --> E A --> F C --> F D --> O E --> O F --> O B --> O Also includes several additional tricks. Args: head_count (int): number of parallel heads model_dim (int): the dimension of keys/values/queries, must be divisible by head_count dropout (float): dropout parameter """ def __init__(self, head_count, model_dim, dropout=0.1, use_final_linear=True): assert model_dim % head_count == 0 self.dim_per_head = model_dim // head_count self.model_dim = model_dim super(MultiHeadedAttention, self).__init__() self.head_count = head_count self.linear_keys = nn.Linear(model_dim, head_count * self.dim_per_head) self.linear_values = nn.Linear(model_dim, head_count * self.dim_per_head) self.linear_query = nn.Linear(model_dim, head_count * self.dim_per_head) self.softmax = nn.Softmax(dim=-1) self.dropout = nn.Dropout(dropout) self.use_final_linear = use_final_linear if (self.use_final_linear): self.final_linear = nn.Linear(model_dim, model_dim) def forward(self, key, value, query, mask=None, layer_cache=None, type=None, predefined_graph_1=None): """ Compute the context vector and the attention vectors. Args: key (`FloatTensor`): set of `key_len` key vectors `[batch, key_len, dim]` value (`FloatTensor`): set of `key_len` value vectors `[batch, key_len, dim]` query (`FloatTensor`): set of `query_len` query vectors `[batch, query_len, dim]` mask: binary mask indicating which keys have non-zero attention `[batch, query_len, key_len]` Returns: (`FloatTensor`, `FloatTensor`) : * output context vectors `[batch, query_len, dim]` * one of the attention vectors `[batch, query_len, key_len]` """ # CHECKS # batch, k_len, d = key.size() # batch_, k_len_, d_ = value.size() # aeq(batch, batch_) # aeq(k_len, k_len_) # aeq(d, d_) # batch_, q_len, d_ = query.size() # aeq(batch, batch_) # aeq(d, d_) # aeq(self.model_dim % 8, 0) # if mask is not None: # batch_, q_len_, k_len_ = mask.size() # aeq(batch_, batch) # aeq(k_len_, k_len) # aeq(q_len_ == q_len) # END CHECKS batch_size = key.size(0) dim_per_head = self.dim_per_head head_count = self.head_count key_len = key.size(1) query_len = query.size(1) def shape(x): """ projection """ return x.view(batch_size, -1, head_count, dim_per_head) \ .transpose(1, 2) def unshape(x): """ compute context """ return x.transpose(1, 2).contiguous() \ .view(batch_size, -1, head_count * dim_per_head) # 1) Project key, value, and query. if layer_cache is not None: if type == "self": query, key, value = self.linear_query(query), \ self.linear_keys(query), \ self.linear_values(query) key = shape(key) value = shape(value) if layer_cache is not None: device = key.device if layer_cache["self_keys"] is not None: key = torch.cat( (layer_cache["self_keys"].to(device), key), dim=2) if layer_cache["self_values"] is not None: value = torch.cat( (layer_cache["self_values"].to(device), value), dim=2) layer_cache["self_keys"] = key layer_cache["self_values"] = value elif type == "context": query = self.linear_query(query) if layer_cache is not None: if layer_cache["memory_keys"] is None: key, value = self.linear_keys(key), \ self.linear_values(value) key = shape(key) value = shape(value) else: key, value = layer_cache["memory_keys"], \ layer_cache["memory_values"] layer_cache["memory_keys"] = key layer_cache["memory_values"] = value else: key, value = self.linear_keys(key), \ self.linear_values(value) key = shape(key) value = shape(value) else: key = self.linear_keys(key) value = self.linear_values(value) query = self.linear_query(query) key = shape(key) value = shape(value) query = shape(query) key_len = key.size(2) query_len = query.size(2) # 2) Calculate and scale scores. query = query / math.sqrt(dim_per_head) scores = torch.matmul(query, key.transpose(2, 3)) if mask is not None: mask = mask.unsqueeze(1).expand_as(scores) scores = scores.masked_fill(mask, -1e18) # 3) Apply attention dropout and compute context vectors. attn = self.softmax(scores) if (not predefined_graph_1 is None): attn_masked = attn[:, -1] * predefined_graph_1 attn_masked = attn_masked / (torch.sum(attn_masked, 2).unsqueeze(2) + 1e-9) attn = torch.cat([attn[:, :-1], attn_masked.unsqueeze(1)], 1) drop_attn = self.dropout(attn) if (self.use_final_linear): context = unshape(torch.matmul(drop_attn, value)) output = self.final_linear(context) return output else: context = torch.matmul(drop_attn, value) return context # CHECK # batch_, q_len_, d_ = output.size() # aeq(q_len, q_len_) # aeq(batch, batch_) # aeq(d, d_) # Return one attn ================================================ FILE: bertsum-chinese/src/models/optimizers.py ================================================ # -*- coding: utf-8 -*- import torch import torch.optim as optim from torch.nn.utils import clip_grad_norm_ # from onmt.utils import use_gpu def use_gpu(opt): """ Creates a boolean if gpu used """ return (hasattr(opt, 'gpu_ranks') and len(opt.gpu_ranks) > 0) or \ (hasattr(opt, 'gpu') and opt.gpu > -1) def build_optim(model, opt, checkpoint): """ Build optimizer """ saved_optimizer_state_dict = None if opt.train_from: optim = checkpoint['optim'] # We need to save a copy of optim.optimizer.state_dict() for setting # the, optimizer state later on in Stage 2 in this method, since # the method optim.set_parameters(model.parameters()) will overwrite # optim.optimizer, and with ith the values stored in # optim.optimizer.state_dict() saved_optimizer_state_dict = optim.optimizer.state_dict() else: optim = Optimizer( opt.optim, opt.learning_rate, opt.max_grad_norm, lr_decay=opt.learning_rate_decay, start_decay_steps=opt.start_decay_steps, decay_steps=opt.decay_steps, beta1=opt.adam_beta1, beta2=opt.adam_beta2, adagrad_accum=opt.adagrad_accumulator_init, decay_method=opt.decay_method, warmup_steps=opt.warmup_steps) # Stage 1: # Essentially optim.set_parameters (re-)creates and optimizer using # model.paramters() as parameters that will be stored in the # optim.optimizer.param_groups field of the torch optimizer class. # Importantly, this method does not yet load the optimizer state, as # essentially it builds a new optimizer with empty optimizer state and # parameters from the model. optim.set_parameters(model.named_parameters()) if opt.train_from: # Stage 2: In this stage, which is only performed when loading an # optimizer from a checkpoint, we load the saved_optimizer_state_dict # into the re-created optimizer, to set the optim.optimizer.state # field, which was previously empty. For this, we use the optimizer # state saved in the "saved_optimizer_state_dict" variable for # this purpose. # See also: https://github.com/pytorch/pytorch/issues/2830 optim.optimizer.load_state_dict(saved_optimizer_state_dict) # Convert back the state values to cuda type if applicable if use_gpu(opt): for state in optim.optimizer.state.values(): for k, v in state.items(): if torch.is_tensor(v): state[k] = v.cuda() # We want to make sure that indeed we have a non-empty optimizer state # when we loaded an existing model. This should be at least the case # for Adam, which saves "exp_avg" and "exp_avg_sq" state # (Exponential moving average of gradient and squared gradient values) if (optim.method == 'adam') and (len(optim.optimizer.state) < 1): raise RuntimeError( "Error: loaded Adam optimizer from existing model" + " but optimizer state is empty") return optim class MultipleOptimizer(object): """ Implement multiple optimizers needed for sparse adam """ def __init__(self, op): """ ? """ self.optimizers = op def zero_grad(self): """ ? """ for op in self.optimizers: op.zero_grad() def step(self): """ ? """ for op in self.optimizers: op.step() @property def state(self): """ ? """ return {k: v for op in self.optimizers for k, v in op.state.items()} def state_dict(self): """ ? """ return [op.state_dict() for op in self.optimizers] def load_state_dict(self, state_dicts): """ ? """ assert len(state_dicts) == len(self.optimizers) for i in range(len(state_dicts)): self.optimizers[i].load_state_dict(state_dicts[i]) class Optimizer(object): """ Controller class for optimization. Mostly a thin wrapper for `optim`, but also useful for implementing rate scheduling beyond what is currently available. Also implements necessary methods for training RNNs such as grad manipulations. Args: method (:obj:`str`): one of [sgd, adagrad, adadelta, adam] lr (float): learning rate lr_decay (float, optional): learning rate decay multiplier start_decay_steps (int, optional): step to start learning rate decay beta1, beta2 (float, optional): parameters for adam adagrad_accum (float, optional): initialization parameter for adagrad decay_method (str, option): custom decay options warmup_steps (int, option): parameter for `noam` decay We use the default parameters for Adam that are suggested by the original paper https://arxiv.org/pdf/1412.6980.pdf These values are also used by other established implementations, e.g. https://www.tensorflow.org/api_docs/python/tf/train/AdamOptimizer https://keras.io/optimizers/ Recently there are slightly different values used in the paper "Attention is all you need" https://arxiv.org/pdf/1706.03762.pdf, particularly the value beta2=0.98 was used there however, beta2=0.999 is still arguably the more established value, so we use that here as well """ def __init__(self, method, learning_rate, max_grad_norm, lr_decay=1, start_decay_steps=None, decay_steps=None, beta1=0.9, beta2=0.999, adagrad_accum=0.0, decay_method=None, warmup_steps=4000 ): self.last_ppl = None self.learning_rate = learning_rate self.original_lr = learning_rate self.max_grad_norm = max_grad_norm self.method = method self.lr_decay = lr_decay self.start_decay_steps = start_decay_steps self.decay_steps = decay_steps self.start_decay = False self._step = 0 self.betas = [beta1, beta2] self.adagrad_accum = adagrad_accum self.decay_method = decay_method self.warmup_steps = warmup_steps def set_parameters(self, params): """ ? """ self.params = [] self.sparse_params = [] for k, p in params: if p.requires_grad: if self.method != 'sparseadam' or "embed" not in k: self.params.append(p) else: self.sparse_params.append(p) if self.method == 'sgd': self.optimizer = optim.SGD(self.params, lr=self.learning_rate) elif self.method == 'adagrad': self.optimizer = optim.Adagrad(self.params, lr=self.learning_rate) for group in self.optimizer.param_groups: for p in group['params']: self.optimizer.state[p]['sum'] = self.optimizer \ .state[p]['sum'].fill_(self.adagrad_accum) elif self.method == 'adadelta': self.optimizer = optim.Adadelta(self.params, lr=self.learning_rate) elif self.method == 'adam': self.optimizer = optim.Adam(self.params, lr=self.learning_rate, betas=self.betas, eps=1e-9) elif self.method == 'sparseadam': self.optimizer = MultipleOptimizer( [optim.Adam(self.params, lr=self.learning_rate, betas=self.betas, eps=1e-8), optim.SparseAdam(self.sparse_params, lr=self.learning_rate, betas=self.betas, eps=1e-8)]) else: raise RuntimeError("Invalid optim method: " + self.method) def _set_rate(self, learning_rate): self.learning_rate = learning_rate if self.method != 'sparseadam': self.optimizer.param_groups[0]['lr'] = self.learning_rate else: for op in self.optimizer.optimizers: op.param_groups[0]['lr'] = self.learning_rate def step(self): """Update the model parameters based on current gradients. Optionally, will employ gradient modification or update learning rate. """ self._step += 1 # Decay method used in tensor2tensor. if self.decay_method == "noam": self._set_rate( self.original_lr * min(self._step ** (-0.5), self._step * self.warmup_steps ** (-1.5))) # self._set_rate(self.original_lr *self.model_size ** (-0.5) *min(1.0, self._step / self.warmup_steps)*max(self._step, self.warmup_steps)**(-0.5)) # Decay based on start_decay_steps every decay_steps else: if ((self.start_decay_steps is not None) and ( self._step >= self.start_decay_steps)): self.start_decay = True if self.start_decay: if ((self._step - self.start_decay_steps) % self.decay_steps == 0): self.learning_rate = self.learning_rate * self.lr_decay if self.method != 'sparseadam': self.optimizer.param_groups[0]['lr'] = self.learning_rate if self.max_grad_norm: clip_grad_norm_(self.params, self.max_grad_norm) self.optimizer.step() ================================================ FILE: bertsum-chinese/src/models/rnn.py ================================================ # -*- coding: utf-8 -*- import torch import torch.nn.functional as F from torch import nn class LayerNormLSTMCell(nn.LSTMCell): def __init__(self, input_size, hidden_size, bias=True): super().__init__(input_size, hidden_size, bias) self.ln_ih = nn.LayerNorm(4 * hidden_size) self.ln_hh = nn.LayerNorm(4 * hidden_size) self.ln_ho = nn.LayerNorm(hidden_size) def forward(self, input, hidden=None): self.check_forward_input(input) if hidden is None: hx = input.new_zeros(input.size(0), self.hidden_size, requires_grad=False) cx = input.new_zeros(input.size(0), self.hidden_size, requires_grad=False) else: hx, cx = hidden self.check_forward_hidden(input, hx, '[0]') self.check_forward_hidden(input, cx, '[1]') gates = self.ln_ih(F.linear(input, self.weight_ih, self.bias_ih)) \ + self.ln_hh(F.linear(hx, self.weight_hh, self.bias_hh)) i, f, o = gates[:, :(3 * self.hidden_size)].sigmoid().chunk(3, 1) g = gates[:, (3 * self.hidden_size):].tanh() cy = (f * cx) + (i * g) hy = o * self.ln_ho(cy).tanh() return hy, cy class LayerNormLSTM(nn.Module): def __init__(self, input_size, hidden_size, num_layers=1, bias=True, bidirectional=False): super().__init__() self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers self.bidirectional = bidirectional num_directions = 2 if bidirectional else 1 self.hidden0 = nn.ModuleList([ LayerNormLSTMCell(input_size=(input_size if layer == 0 else hidden_size * num_directions), hidden_size=hidden_size, bias=bias) for layer in range(num_layers) ]) if self.bidirectional: self.hidden1 = nn.ModuleList([ LayerNormLSTMCell(input_size=(input_size if layer == 0 else hidden_size * num_directions), hidden_size=hidden_size, bias=bias) for layer in range(num_layers) ]) def forward(self, input, hidden=None): seq_len, batch_size, hidden_size = input.size() # supports TxNxH only num_directions = 2 if self.bidirectional else 1 if hidden is None: hx = input.new_zeros(self.num_layers * num_directions, batch_size, self.hidden_size, requires_grad=False) cx = input.new_zeros(self.num_layers * num_directions, batch_size, self.hidden_size, requires_grad=False) else: hx, cx = hidden ht = [[None, ] * (self.num_layers * num_directions)] * seq_len ct = [[None, ] * (self.num_layers * num_directions)] * seq_len if self.bidirectional: xs = input for l, (layer0, layer1) in enumerate(zip(self.hidden0, self.hidden1)): l0, l1 = 2 * l, 2 * l + 1 h0, c0, h1, c1 = hx[l0], cx[l0], hx[l1], cx[l1] for t, (x0, x1) in enumerate(zip(xs, reversed(xs))): ht[t][l0], ct[t][l0] = layer0(x0, (h0, c0)) h0, c0 = ht[t][l0], ct[t][l0] t = seq_len - 1 - t ht[t][l1], ct[t][l1] = layer1(x1, (h1, c1)) h1, c1 = ht[t][l1], ct[t][l1] xs = [torch.cat((h[l0], h[l1]), dim=1) for h in ht] y = torch.stack(xs) hy = torch.stack(ht[-1]) cy = torch.stack(ct[-1]) else: h, c = hx, cx for t, x in enumerate(input): for l, layer in enumerate(self.hidden0): ht[t][l], ct[t][l] = layer(x, (h[l], c[l])) x = ht[t][l] h, c = ht[t], ct[t] y = torch.stack([h[-1] for h in ht]) hy = torch.stack(ht[-1]) cy = torch.stack(ct[-1]) return y, (hy, cy) ================================================ FILE: bertsum-chinese/src/models/trainer.py ================================================ # -*- coding: utf-8 -*- import os import numpy as np import pandas as pd import torch from src.others.logging import logger import src.others.utils as utils def build_trainer(args, model, optim): trainer = Trainer(args, model, optim, args.accum_count) if model: n_params = utils.tally_parameters(model) logger.info('* number of parameters: %d' % n_params) return trainer class Trainer(object): def __init__(self, args, model, optim, grad_accum_count=1): self.args = args self.save_checkpoint_steps = args.save_checkpoint_steps self.model = model self.optim = optim self.grad_accum_count = grad_accum_count self.loss = torch.nn.BCELoss(reduction='none') assert grad_accum_count > 0 if model: self.model.train() def train(self, train_iter_fct, train_steps): logger.info('Start training...') step = self.optim._step + 1 # 最终给的batch 是这个 true_batchs = [] # 训练了的batch(true_batchs) 次数 accum = 0 train_iter = train_iter_fct() while step <= train_steps: reduce_counter = 0 for i, batch in enumerate(train_iter): true_batchs.append(batch) accum += 1 # true_batchs append的batch有grad_accum_count个时,开始给进去训练 if accum == self.grad_accum_count: reduce_counter += 1 # 训练 loss = self._gradient_accumulation(true_batchs) if step % 2 == 0: print('step:', step, 'loss:', loss.cpu().detach().numpy()) true_batchs = [] accum = 0 if step % self.save_checkpoint_steps == 0: self._save(step) step += 1 train_iter = train_iter_fct() if step > train_steps: self._save(step) def test(self, test_iter, step): self.model.eval() result_s = {'real_idx': [], 'predict_idx': [], 'src': []} save_path = self.args.result_path + '_step_' + str(step) + '.csv' with torch.no_grad(): for batch in test_iter: src = batch.src labels = batch.labels segs = batch.segs clss = batch.clss mask = batch.mask mask_cls = batch.mask_cls sent_scores, mask = self.model(src, segs, clss, mask, mask_cls) sent_scores = sent_scores + mask.float() sent_scores = sent_scores.cpu().data.numpy() # 从大到小 selected_ids = np.argsort(-sent_scores, 1) for i, idx in enumerate(selected_ids): _pred_idx = [] if len(batch.src_str[i]) == 0: continue for j in selected_ids[i][:len(batch.src_str[i])]: if j >= len(batch.src_str[i]): continue # candidate = batch.src_str[i][j].strip() _pred_idx.append(j) if not self.args.recall_eval and len(_pred_idx) == 3: break result_s['src'].append('[SEP]'.join(batch.src_str[i])) result_s['predict_idx'].append(utils.int_arr_to_str(_pred_idx)) label_idx = utils.label_to_idx(labels[i].tolist()) result_s['real_idx'].append(utils.int_arr_to_str(label_idx)) save_df = pd.DataFrame() save_df['real_idx'] = result_s['real_idx'] save_df['predict_idx'] = result_s['predict_idx'] save_df['src'] = result_s['src'] save_df.to_csv(save_path, sep='\t', index=False) def _gradient_accumulation(self, true_batchs): if self.grad_accum_count > 1: self.model.zero_grad() for batch in true_batchs: if self.grad_accum_count == 1: self.model.zero_grad() src = batch.src labels = batch.labels segs = batch.segs clss = batch.clss mask = batch.mask mask_cls = batch.mask_cls sent_scores, mask = self.model(src, segs, clss, mask, mask_cls) loss = self.loss(sent_scores, labels.float()) loss = (loss * mask.float()).sum() # .numel():Returns the total number of elements in the input tensor. (loss / loss.numel()).backward() self.optim.step() return loss def _save(self, step): model_state_dict = self.model.state_dict() checkpoint = { 'model': model_state_dict, 'opt': self.args, 'optim': self.optim, } checkpoint_path = os.path.join(self.args.model_path, 'model_step_%d.pt' % step) logger.info("Saving checkpoint %s" % checkpoint_path) if not os.path.exists(checkpoint_path): torch.save(checkpoint, checkpoint_path) return checkpoint, checkpoint_path ================================================ FILE: bertsum-chinese/src/others/__init__.py ================================================ ================================================ FILE: bertsum-chinese/src/others/logging.py ================================================ # -*- coding: utf-8 -*- from __future__ import absolute_import import logging logger = logging.getLogger() def init_logger(log_file=None, log_file_level=logging.NOTSET): log_format = logging.Formatter("[%(asctime)s %(levelname)s] %(message)s") logger = logging.getLogger() logger.setLevel(logging.INFO) console_handler = logging.StreamHandler() console_handler.setFormatter(log_format) logger.handlers = [console_handler] if log_file and log_file != '': file_handler = logging.FileHandler(log_file) file_handler.setLevel(log_file_level) file_handler.setFormatter(log_format) logger.addHandler(file_handler) return logger ================================================ FILE: bertsum-chinese/src/others/statistical.py ================================================ import pandas as pd path = '../../results/result_step_10001.csv' result_step_10001 = pd.read_csv(path, sep='\t') def apply_statis(x: pd.Series): real_idx = [int(i) for i in x['real_idx'].split(' ')] predict_idx = sorted([int(i) for i in x['predict_idx'].split(' ')]) r_in_p = 0.0 # real_idx中,在predict_idx中的数量 r_notin_p = 0.0 # real_idx中,不在predict_idx中的数量 for ri in real_idx: if ri in predict_idx: r_in_p += 1.0 else: r_notin_p += 1.0 x['r_in_p'] = r_in_p / len(real_idx) x['r_notin_p'] = r_notin_p / len(real_idx) # print(real_idx,predict_idx,x['r_in_p'],x['r_notin_p']) return x def sent_sount_stas(): # 句子量分布 result_step_10001['len'] = result_step_10001['src'].apply(lambda x: len(x.split('[SEP]'))) rsc = result_step_10001['len'].value_counts() return rsc result_step_10001 = result_step_10001.apply(lambda x: apply_statis(x), axis=1) res = sent_sount_stas() print('句子量分布', res) # 预测占比 succ = result_step_10001['r_in_p'].sum() / result_step_10001.shape[0] print('预测占比:', succ) # 出错占比 erro = result_step_10001['r_notin_p'].sum() / result_step_10001.shape[0] print('出错占比:', erro) ================================================ FILE: bertsum-chinese/src/others/utils.py ================================================ # -*- coding: utf-8 -*- import argparse def str2bool(v): if v.lower() in ('yes', 'true', 't', 'y', '1'): return True elif v.lower() in ('no', 'false', 'f', 'n', '0'): return False else: raise argparse.ArgumentTypeError('Boolean value expected.') def int_arr_to_str(arr: list): arr = [str(i) for i in arr] return ' '.join(arr) def label_to_idx(label_arr: list): # 词袋形 label arr,转成 索引位置:[1,0,1,1,0]>>>>>[0,2,3] return [i for i, li in enumerate(label_arr) if li == 1] def tally_parameters(model): n_params = sum([p.nelement() for p in model.parameters()]) return n_params ================================================ FILE: bertsum-chinese/src/prepro/__init__.py ================================================ ================================================ FILE: bertsum-chinese/src/prepro/data_builder_LAI.py ================================================ # -*- coding: utf-8 -*- import gc import glob import json import os from os.path import join as pjoin import torch from multiprocessing.pool import Pool from transformers import BertTokenizer class BertData(): def __init__(self, args): self.args = args # 加载中文词汇表 self.tokenizer = BertTokenizer.from_pretrained(args.bert_base_chinese, do_lower_case=True) self.sep_vid = self.tokenizer.vocab['[SEP]'] self.cls_vid = self.tokenizer.vocab['[CLS]'] self.pad_vid = self.tokenizer.vocab['[PAD]'] def preprocess(self, src: str, key_sents_ids: list) -> tuple: if len(src) < self.args.min_nsents: return None original_src_txt = [' '.join(s) for s in src] labels = [0] * len(src) for k_idx in key_sents_ids: labels[k_idx] = 1 # 满足大于min_src_ntokens的句子才会被选中 idxs = [i for i, s in enumerate(src) if (len(s) > self.args.min_src_ntokens)] # 截取超过max_src_ntokens部分的不要 src = [src[i][:self.args.max_src_ntokens] for i in idxs] labels = [labels[i] for i in idxs] src = src[:self.args.max_nsents] labels = labels[:self.args.max_nsents] # 所有句子连接成一大长文本 src_txt = [' '.join(sent) for sent in src] text = ' [SEP] [CLS] '.join(src_txt) src_subtokens = self.tokenizer.tokenize(text) # 限定最终最长长度 src_subtokens = src_subtokens[:self.args.max_position_embeddings - 2] src_subtokens = ['[CLS]'] + src_subtokens + ['[SEP]'] # 文本字,转成vocab映射后的 token src_subtoken_idxs = self.tokenizer.convert_tokens_to_ids(src_subtokens) # 拿到[SEP]分割点位置 _segs = [-1] + [i for i, t in enumerate(src_subtoken_idxs) if t == self.sep_vid] # 计算前后[sep]距离 segs = [_segs[i] - _segs[i - 1] for i in range(1, len(_segs))] segments_ids = [] # 单双,每过一个[SEP],segment为0/1 for i, s in enumerate(segs): if i % 2 == 0: segments_ids += s * [0] else: segments_ids += s * [1] # [CLS]分类标记位置 cls_ids = [i for i, t in enumerate(src_subtoken_idxs) if t == self.cls_vid] labels = labels[:len(cls_ids)] src_txt = [original_src_txt[i] for i in idxs] return src_subtoken_idxs, labels, segments_ids, cls_ids, src_txt def _format_to_bert(params) -> None: json_file, args, save_file = params if os.path.exists(save_file): print('Ignore %s' % save_file) return bert = BertData(args) print('Processing %s' % json_file) jobs = json.load(open(json_file, encoding='utf-8')) datasets = [] for d in jobs: source, oracle_ids = d['src'], d['ids'] # 转成 src_subtoken_idxs, labels, segments_ids, cls_ids, src_txt b_data = bert.preprocess(source, oracle_ids) if b_data is None: continue indexed_tokens, labels, segments_ids, cls_ids, src_txt = b_data # 以字典形式保存 b_data_dict = {"src": indexed_tokens, "labels": labels, "segs": segments_ids, 'clss': cls_ids, 'src_txt': src_txt} datasets.append(b_data_dict) print('Saving to %s' % save_file) torch.save(datasets, save_file) datasets = [] gc.collect() def format_to_bert(args) -> None: if args.dataset != '': datasets = [args.dataset] else: datasets = ['train', 'valid', 'test'] for corpus_type in datasets: a_lst = [] pts = sorted(glob.glob(pjoin(args.raw_path, '*' + corpus_type + '*.json'))) for json_f in pts: # 请注意,windows 和linux不一样 if '\\' in json_f: real_name = json_f.split('\\')[-1] else: real_name = json_f.split('/')[-1] a_lst.append((json_f, args, pjoin(args.save_path, real_name.replace('json', 'bert.pt')))) print('a_lst:', a_lst) pool = Pool(args.n_cpus) for d in pool.imap(_format_to_bert, a_lst): pass pool.close() pool.join() ================================================ FILE: bertsum-chinese/train_LAI.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- from __future__ import division import random import torch from transformers import BertConfig from src.models import data_loader, model_builder_LAI from src.models.data_loader import load_dataset from src.models.model_builder_LAI import Summarizer from src.models.trainer import build_trainer from src.others.logging import logger, init_logger from args_config import args model_flags = ['hidden_size', 'ff_size', 'heads', 'inter_layers', 'encoder', 'ff_actv', 'use_interval', 'rnn_size'] def test(args, test_from, step): device = "cpu" if args.visible_gpus == '-1' else "cuda" logger.info('Loading checkpoint from %s' % test_from) checkpoint = torch.load(test_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint['opt']) for k in opt.keys(): if k in model_flags: setattr(args, k, opt[k]) print(args) config = BertConfig.from_json_file(args.bert_config_path) model = Summarizer(args, device, load_pretrained_bert=False, bert_config=config) model.load_cp(checkpoint) model.eval() test_iter = data_loader.Dataloader(args, load_dataset(args, 'test', shuffle=False), args.batch_size, device, shuffle=False, is_test=True) trainer = build_trainer(args, model, None) trainer.test(test_iter, step) def train(args, device_id): init_logger(args.log_file) device = "cpu" if args.visible_gpus == '-1' else "cuda" logger.info('Device ID %d' % device_id) logger.info('Device %s' % device) torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True if device_id >= 0: torch.cuda.set_device(device_id) torch.cuda.manual_seed(args.seed) torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True def train_iter_fct(): # 测试,不shuffle return data_loader.Dataloader(args, load_dataset(args, 'train', shuffle=True), args.batch_size, device, shuffle=True, is_test=False) model = Summarizer(args, device, load_pretrained_bert=True) if args.train_from != '': logger.info('Loading checkpoint from %s' % args.train_from) checkpoint = torch.load(args.train_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint['opt']) for k in opt.keys(): if k in model_flags: setattr(args, k, opt[k]) model.load_cp(checkpoint) optim = model_builder_LAI.build_optim(args, model, checkpoint) else: optim = model_builder_LAI.build_optim(args, model, None) logger.info('model load success............') logger.info(model) trainer = build_trainer(args, model, optim) trainer.train(train_iter_fct, args.train_steps) if __name__ == '__main__': init_logger(args.log_file) device = "cpu" if args.visible_gpus == '-1' else "cuda" device_id = 0 if device == "cuda" else -1 if args.mode == 'train': train(args, device_id) elif args.mode == 'test': cp = args.test_from try: step = int(cp.split('.')[-2].split('_')[-1]) except: step = 0 test(args, args.test_from, step)