Showing preview only (262K chars total). Download the full file or copy to clipboard to get everything.
Repository: graykode/nlp-tutorial
Branch: master
Commit: d05e31ec81d5
Files: 42
Total size: 248.5 KB
Directory structure:
gitextract_mxhf1sqm/
├── .github/
│ └── workflows/
│ └── python-app.yml
├── .gitignore
├── 1-1.NNLM/
│ ├── NNLM.ipynb
│ └── NNLM.py
├── 1-2.Word2Vec/
│ ├── Word2Vec-Skipgram(Softmax).ipynb
│ └── Word2Vec-Skipgram(Softmax).py
├── 1-3.FastText/
│ ├── FastText.ipynb
│ ├── test.txt
│ └── train.txt
├── 2-1.TextCNN/
│ ├── TextCNN.ipynb
│ └── TextCNN.py
├── 3-1.TextRNN/
│ ├── TextRNN.ipynb
│ └── TextRNN.py
├── 3-2.TextLSTM/
│ ├── TextLSTM.ipynb
│ └── TextLSTM.py
├── 3-3.Bi-LSTM/
│ ├── Bi-LSTM.ipynb
│ └── Bi-LSTM.py
├── 4-1.Seq2Seq/
│ ├── Seq2Seq.ipynb
│ └── Seq2Seq.py
├── 4-2.Seq2Seq(Attention)/
│ ├── Seq2Seq(Attention).ipynb
│ └── Seq2Seq(Attention).py
├── 4-3.Bi-LSTM(Attention)/
│ ├── Bi-LSTM(Attention).ipynb
│ └── Bi-LSTM(Attention).py
├── 5-1.Transformer/
│ ├── Transformer(Greedy_decoder).ipynb
│ ├── Transformer(Greedy_decoder).py
│ ├── Transformer.ipynb
│ └── Transformer.py
├── 5-2.BERT/
│ ├── BERT.ipynb
│ └── BERT.py
├── CONTRIBUTING.md
├── LICENSE
├── README.md
└── archive/
└── tensorflow/
└── v1/
├── 1-1.NNLM/
│ └── NNLM.py
├── 1-2.Word2Vec/
│ ├── Word2Vec-Skipgram(NCE_loss).py
│ └── Word2Vec-Skipgram(Softmax).py
├── 2-1.TextCNN/
│ └── TextCNN.py
├── 3-1.TextRNN/
│ └── TextRNN.py
├── 3-2.TextLSTM/
│ └── TextLSTM.py
├── 3-3.Bi-LSTM/
│ └── Bi-LSTM.py
├── 4-1.Seq2Seq/
│ └── Seq2Seq.py
├── 4-2.Seq2Seq(Attention)/
│ └── Seq2Seq(Attention).py
└── 4-3.Bi-LSTM(Attention)/
└── Bi-LSTM(Attention).py
================================================
FILE CONTENTS
================================================
================================================
FILE: .github/workflows/python-app.yml
================================================
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
name: Python application
on:
push:
branches: [ master ]
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.8
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install py2ipynb==0.0.5
- name: Test with py2ipynb
run: |
py2ipynb '*/*.py'
- name: Commit changes
uses: EndBug/add-and-commit@v4
with:
author_name: graykode
author_email: nlkey2022@gmail.com
message: "Automatic convert from py to ipynb"
add: "*/*.ipynb"
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
================================================
FILE: .gitignore
================================================
.idea
================================================
FILE: 1-1.NNLM/NNLM.ipynb
================================================
{
"cells": [
{
"cell_type": "code",
"metadata": {},
"source": [
"# code by Tae Hwan Jung @graykode\n",
"import torch\n",
"import torch.nn as nn\n",
"import torch.optim as optim\n",
"\n",
"def make_batch():\n",
" input_batch = []\n",
" target_batch = []\n",
"\n",
" for sen in sentences:\n",
" word = sen.split() # space tokenizer\n",
" input = [word_dict[n] for n in word[:-1]] # create (1~n-1) as input\n",
" target = word_dict[word[-1]] # create (n) as target, We usually call this 'casual language model'\n",
"\n",
" input_batch.append(input)\n",
" target_batch.append(target)\n",
"\n",
" return input_batch, target_batch\n",
"\n",
"# Model\n",
"class NNLM(nn.Module):\n",
" def __init__(self):\n",
" super(NNLM, self).__init__()\n",
" self.C = nn.Embedding(n_class, m)\n",
" self.H = nn.Linear(n_step * m, n_hidden, bias=False)\n",
" self.d = nn.Parameter(torch.ones(n_hidden))\n",
" self.U = nn.Linear(n_hidden, n_class, bias=False)\n",
" self.W = nn.Linear(n_step * m, n_class, bias=False)\n",
" self.b = nn.Parameter(torch.ones(n_class))\n",
"\n",
" def forward(self, X):\n",
" X = self.C(X) # X : [batch_size, n_step, m]\n",
" X = X.view(-1, n_step * m) # [batch_size, n_step * m]\n",
" tanh = torch.tanh(self.d + self.H(X)) # [batch_size, n_hidden]\n",
" output = self.b + self.W(X) + self.U(tanh) # [batch_size, n_class]\n",
" return output\n",
"\n",
"if __name__ == '__main__':\n",
" n_step = 2 # number of steps, n-1 in paper\n",
" n_hidden = 2 # number of hidden size, h in paper\n",
" m = 2 # embedding size, m in paper\n",
"\n",
" sentences = [\"i like dog\", \"i love coffee\", \"i hate milk\"]\n",
"\n",
" word_list = \" \".join(sentences).split()\n",
" word_list = list(set(word_list))\n",
" word_dict = {w: i for i, w in enumerate(word_list)}\n",
" number_dict = {i: w for i, w in enumerate(word_list)}\n",
" n_class = len(word_dict) # number of Vocabulary\n",
"\n",
" model = NNLM()\n",
"\n",
" criterion = nn.CrossEntropyLoss()\n",
" optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
"\n",
" input_batch, target_batch = make_batch()\n",
" input_batch = torch.LongTensor(input_batch)\n",
" target_batch = torch.LongTensor(target_batch)\n",
"\n",
" # Training\n",
" for epoch in range(5000):\n",
" optimizer.zero_grad()\n",
" output = model(input_batch)\n",
"\n",
" # output : [batch_size, n_class], target_batch : [batch_size]\n",
" loss = criterion(output, target_batch)\n",
" if (epoch + 1) % 1000 == 0:\n",
" print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n",
"\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
" # Predict\n",
" predict = model(input_batch).data.max(1, keepdim=True)[1]\n",
"\n",
" # Test\n",
" print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])"
],
"outputs": [],
"execution_count": null
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
================================================
FILE: 1-1.NNLM/NNLM.py
================================================
# %%
# code by Tae Hwan Jung @graykode
import torch
import torch.nn as nn
import torch.optim as optim
def make_batch():
input_batch = []
target_batch = []
for sen in sentences:
word = sen.split() # space tokenizer
input = [word_dict[n] for n in word[:-1]] # create (1~n-1) as input
target = word_dict[word[-1]] # create (n) as target, We usually call this 'casual language model'
input_batch.append(input)
target_batch.append(target)
return input_batch, target_batch
# Model
class NNLM(nn.Module):
def __init__(self):
super(NNLM, self).__init__()
self.C = nn.Embedding(n_class, m)
self.H = nn.Linear(n_step * m, n_hidden, bias=False)
self.d = nn.Parameter(torch.ones(n_hidden))
self.U = nn.Linear(n_hidden, n_class, bias=False)
self.W = nn.Linear(n_step * m, n_class, bias=False)
self.b = nn.Parameter(torch.ones(n_class))
def forward(self, X):
X = self.C(X) # X : [batch_size, n_step, m]
X = X.view(-1, n_step * m) # [batch_size, n_step * m]
tanh = torch.tanh(self.d + self.H(X)) # [batch_size, n_hidden]
output = self.b + self.W(X) + self.U(tanh) # [batch_size, n_class]
return output
if __name__ == '__main__':
n_step = 2 # number of steps, n-1 in paper
n_hidden = 2 # number of hidden size, h in paper
m = 2 # embedding size, m in paper
sentences = ["i like dog", "i love coffee", "i hate milk"]
word_list = " ".join(sentences).split()
word_list = list(set(word_list))
word_dict = {w: i for i, w in enumerate(word_list)}
number_dict = {i: w for i, w in enumerate(word_list)}
n_class = len(word_dict) # number of Vocabulary
model = NNLM()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
input_batch, target_batch = make_batch()
input_batch = torch.LongTensor(input_batch)
target_batch = torch.LongTensor(target_batch)
# Training
for epoch in range(5000):
optimizer.zero_grad()
output = model(input_batch)
# output : [batch_size, n_class], target_batch : [batch_size]
loss = criterion(output, target_batch)
if (epoch + 1) % 1000 == 0:
print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
loss.backward()
optimizer.step()
# Predict
predict = model(input_batch).data.max(1, keepdim=True)[1]
# Test
print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])
================================================
FILE: 1-2.Word2Vec/Word2Vec-Skipgram(Softmax).ipynb
================================================
{
"cells": [
{
"cell_type": "code",
"metadata": {},
"source": [
"# code by Tae Hwan Jung @graykode\n",
"import numpy as np\n",
"import torch\n",
"import torch.nn as nn\n",
"import torch.optim as optim\n",
"import matplotlib.pyplot as plt\n",
"\n",
"def random_batch():\n",
" random_inputs = []\n",
" random_labels = []\n",
" random_index = np.random.choice(range(len(skip_grams)), batch_size, replace=False)\n",
"\n",
" for i in random_index:\n",
" random_inputs.append(np.eye(voc_size)[skip_grams[i][0]]) # target\n",
" random_labels.append(skip_grams[i][1]) # context word\n",
"\n",
" return random_inputs, random_labels\n",
"\n",
"# Model\n",
"class Word2Vec(nn.Module):\n",
" def __init__(self):\n",
" super(Word2Vec, self).__init__()\n",
" # W and WT is not Traspose relationship\n",
" self.W = nn.Linear(voc_size, embedding_size, bias=False) # voc_size > embedding_size Weight\n",
" self.WT = nn.Linear(embedding_size, voc_size, bias=False) # embedding_size > voc_size Weight\n",
"\n",
" def forward(self, X):\n",
" # X : [batch_size, voc_size]\n",
" hidden_layer = self.W(X) # hidden_layer : [batch_size, embedding_size]\n",
" output_layer = self.WT(hidden_layer) # output_layer : [batch_size, voc_size]\n",
" return output_layer\n",
"\n",
"if __name__ == '__main__':\n",
" batch_size = 2 # mini-batch size\n",
" embedding_size = 2 # embedding size\n",
"\n",
" sentences = [\"apple banana fruit\", \"banana orange fruit\", \"orange banana fruit\",\n",
" \"dog cat animal\", \"cat monkey animal\", \"monkey dog animal\"]\n",
"\n",
" word_sequence = \" \".join(sentences).split()\n",
" word_list = \" \".join(sentences).split()\n",
" word_list = list(set(word_list))\n",
" word_dict = {w: i for i, w in enumerate(word_list)}\n",
" voc_size = len(word_list)\n",
"\n",
" # Make skip gram of one size window\n",
" skip_grams = []\n",
" for i in range(1, len(word_sequence) - 1):\n",
" target = word_dict[word_sequence[i]]\n",
" context = [word_dict[word_sequence[i - 1]], word_dict[word_sequence[i + 1]]]\n",
" for w in context:\n",
" skip_grams.append([target, w])\n",
"\n",
" model = Word2Vec()\n",
"\n",
" criterion = nn.CrossEntropyLoss()\n",
" optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
"\n",
" # Training\n",
" for epoch in range(5000):\n",
" input_batch, target_batch = random_batch()\n",
" input_batch = torch.Tensor(input_batch)\n",
" target_batch = torch.LongTensor(target_batch)\n",
"\n",
" optimizer.zero_grad()\n",
" output = model(input_batch)\n",
"\n",
" # output : [batch_size, voc_size], target_batch : [batch_size] (LongTensor, not one-hot)\n",
" loss = criterion(output, target_batch)\n",
" if (epoch + 1) % 1000 == 0:\n",
" print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n",
"\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
" for i, label in enumerate(word_list):\n",
" W, WT = model.parameters()\n",
" x, y = W[0][i].item(), W[1][i].item()\n",
" plt.scatter(x, y)\n",
" plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom')\n",
" plt.show()\n"
],
"outputs": [],
"execution_count": null
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
================================================
FILE: 1-2.Word2Vec/Word2Vec-Skipgram(Softmax).py
================================================
# %%
# code by Tae Hwan Jung @graykode
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
def random_batch():
random_inputs = []
random_labels = []
random_index = np.random.choice(range(len(skip_grams)), batch_size, replace=False)
for i in random_index:
random_inputs.append(np.eye(voc_size)[skip_grams[i][0]]) # target
random_labels.append(skip_grams[i][1]) # context word
return random_inputs, random_labels
# Model
class Word2Vec(nn.Module):
def __init__(self):
super(Word2Vec, self).__init__()
# W and WT is not Traspose relationship
self.W = nn.Linear(voc_size, embedding_size, bias=False) # voc_size > embedding_size Weight
self.WT = nn.Linear(embedding_size, voc_size, bias=False) # embedding_size > voc_size Weight
def forward(self, X):
# X : [batch_size, voc_size]
hidden_layer = self.W(X) # hidden_layer : [batch_size, embedding_size]
output_layer = self.WT(hidden_layer) # output_layer : [batch_size, voc_size]
return output_layer
if __name__ == '__main__':
batch_size = 2 # mini-batch size
embedding_size = 2 # embedding size
sentences = ["apple banana fruit", "banana orange fruit", "orange banana fruit",
"dog cat animal", "cat monkey animal", "monkey dog animal"]
word_sequence = " ".join(sentences).split()
word_list = " ".join(sentences).split()
word_list = list(set(word_list))
word_dict = {w: i for i, w in enumerate(word_list)}
voc_size = len(word_list)
# Make skip gram of one size window
skip_grams = []
for i in range(1, len(word_sequence) - 1):
target = word_dict[word_sequence[i]]
context = [word_dict[word_sequence[i - 1]], word_dict[word_sequence[i + 1]]]
for w in context:
skip_grams.append([target, w])
model = Word2Vec()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Training
for epoch in range(5000):
input_batch, target_batch = random_batch()
input_batch = torch.Tensor(input_batch)
target_batch = torch.LongTensor(target_batch)
optimizer.zero_grad()
output = model(input_batch)
# output : [batch_size, voc_size], target_batch : [batch_size] (LongTensor, not one-hot)
loss = criterion(output, target_batch)
if (epoch + 1) % 1000 == 0:
print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
loss.backward()
optimizer.step()
for i, label in enumerate(word_list):
W, WT = model.parameters()
x, y = W[0][i].item(), W[1][i].item()
plt.scatter(x, y)
plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom')
plt.show()
================================================
FILE: 1-3.FastText/FastText.ipynb
================================================
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "FastText.ipynb",
"version": "0.3.2",
"provenance": [],
"collapsed_sections": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"accelerator": "GPU"
},
"cells": [
{
"metadata": {
"id": "kg9kgMnGqYkU",
"colab_type": "text"
},
"cell_type": "markdown",
"source": [
"## Install [FastText](https://fasttext.cc/docs/en/supervised-tutorial.html)"
]
},
{
"metadata": {
"id": "3Iod5UKTqZnC",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 11051
},
"outputId": "b10c85c0-c4cf-4f0b-a30e-2207ae4512b2"
},
"cell_type": "code",
"source": [
"!wget https://github.com/facebookresearch/fastText/archive/0.2.0.zip\n",
"!unzip 0.2.0.zip\n",
"%cd fastText-0.2.0\n",
"!make"
],
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"text": [
"--2019-02-02 14:43:56-- https://github.com/facebookresearch/fastText/archive/0.2.0.zip\n",
"Resolving github.com (github.com)... 140.82.118.3, 140.82.118.4\n",
"Connecting to github.com (github.com)|140.82.118.3|:443... connected.\n",
"HTTP request sent, awaiting response... 302 Found\n",
"Location: https://codeload.github.com/facebookresearch/fastText/zip/0.2.0 [following]\n",
"--2019-02-02 14:43:56-- https://codeload.github.com/facebookresearch/fastText/zip/0.2.0\n",
"Resolving codeload.github.com (codeload.github.com)... 192.30.253.121, 192.30.253.120\n",
"Connecting to codeload.github.com (codeload.github.com)|192.30.253.121|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: unspecified [application/zip]\n",
"Saving to: ‘0.2.0.zip’\n",
"\n",
"0.2.0.zip [ <=> ] 4.10M 6.17MB/s in 0.7s \n",
"\n",
"2019-02-02 14:43:57 (6.17 MB/s) - ‘0.2.0.zip’ saved [4304799]\n",
"\n",
"Archive: 0.2.0.zip\n",
"7842495a4d64c7a3bb4339d45d6e64321d002ed8\n",
" creating: fastText-0.2.0/\n",
" creating: fastText-0.2.0/.circleci/\n",
" inflating: fastText-0.2.0/.circleci/cmake_test.sh \n",
" inflating: fastText-0.2.0/.circleci/config.yml \n",
" inflating: fastText-0.2.0/.circleci/gcc_test.sh \n",
" inflating: fastText-0.2.0/.circleci/pip_test.sh \n",
" inflating: fastText-0.2.0/.circleci/pull_data.sh \n",
" inflating: fastText-0.2.0/.circleci/python_test.sh \n",
" inflating: fastText-0.2.0/.circleci/run_locally.sh \n",
" inflating: fastText-0.2.0/.circleci/setup_circleimg.sh \n",
" inflating: fastText-0.2.0/.circleci/setup_debian.sh \n",
" inflating: fastText-0.2.0/.gitignore \n",
" inflating: fastText-0.2.0/CMakeLists.txt \n",
" inflating: fastText-0.2.0/CONTRIBUTING.md \n",
" inflating: fastText-0.2.0/LICENSE \n",
" inflating: fastText-0.2.0/MANIFEST.in \n",
" inflating: fastText-0.2.0/Makefile \n",
" inflating: fastText-0.2.0/README.md \n",
" creating: fastText-0.2.0/alignment/\n",
" inflating: fastText-0.2.0/alignment/README.md \n",
" inflating: fastText-0.2.0/alignment/align.py \n",
" inflating: fastText-0.2.0/alignment/eval.py \n",
" inflating: fastText-0.2.0/alignment/example.sh \n",
" inflating: fastText-0.2.0/alignment/utils.py \n",
" inflating: fastText-0.2.0/classification-example.sh \n",
" inflating: fastText-0.2.0/classification-results.sh \n",
" creating: fastText-0.2.0/docs/\n",
" inflating: fastText-0.2.0/docs/aligned-vectors.md \n",
" inflating: fastText-0.2.0/docs/api.md \n",
" inflating: fastText-0.2.0/docs/cheatsheet.md \n",
" inflating: fastText-0.2.0/docs/crawl-vectors.md \n",
" inflating: fastText-0.2.0/docs/dataset.md \n",
" inflating: fastText-0.2.0/docs/english-vectors.md \n",
" inflating: fastText-0.2.0/docs/faqs.md \n",
" inflating: fastText-0.2.0/docs/language-identification.md \n",
" inflating: fastText-0.2.0/docs/options.md \n",
" inflating: fastText-0.2.0/docs/pretrained-vectors.md \n",
" inflating: fastText-0.2.0/docs/references.md \n",
" inflating: fastText-0.2.0/docs/supervised-models.md \n",
" inflating: fastText-0.2.0/docs/supervised-tutorial.md \n",
" inflating: fastText-0.2.0/docs/support.md \n",
" inflating: fastText-0.2.0/docs/unsupervised-tutorials.md \n",
" inflating: fastText-0.2.0/eval.py \n",
" inflating: fastText-0.2.0/get-wikimedia.sh \n",
" inflating: fastText-0.2.0/pretrained-vectors.md \n",
" creating: fastText-0.2.0/python/\n",
" inflating: fastText-0.2.0/python/README.md \n",
" inflating: fastText-0.2.0/python/README.rst \n",
" creating: fastText-0.2.0/python/benchmarks/\n",
" inflating: fastText-0.2.0/python/benchmarks/README.rst \n",
" inflating: fastText-0.2.0/python/benchmarks/get_word_vector.py \n",
" creating: fastText-0.2.0/python/doc/\n",
" creating: fastText-0.2.0/python/doc/examples/\n",
" inflating: fastText-0.2.0/python/doc/examples/FastTextEmbeddingBag.py \n",
" inflating: fastText-0.2.0/python/doc/examples/bin_to_vec.py \n",
" inflating: fastText-0.2.0/python/doc/examples/compute_accuracy.py \n",
" inflating: fastText-0.2.0/python/doc/examples/get_vocab.py \n",
" inflating: fastText-0.2.0/python/doc/examples/train_supervised.py \n",
" inflating: fastText-0.2.0/python/doc/examples/train_unsupervised.py \n",
" creating: fastText-0.2.0/python/fastText/\n",
" inflating: fastText-0.2.0/python/fastText/FastText.py \n",
" inflating: fastText-0.2.0/python/fastText/__init__.py \n",
" creating: fastText-0.2.0/python/fastText/pybind/\n",
" inflating: fastText-0.2.0/python/fastText/pybind/fasttext_pybind.cc \n",
" creating: fastText-0.2.0/python/fastText/tests/\n",
" inflating: fastText-0.2.0/python/fastText/tests/__init__.py \n",
" inflating: fastText-0.2.0/python/fastText/tests/test_configurations.py \n",
" inflating: fastText-0.2.0/python/fastText/tests/test_script.py \n",
" creating: fastText-0.2.0/python/fastText/util/\n",
" inflating: fastText-0.2.0/python/fastText/util/__init__.py \n",
" inflating: fastText-0.2.0/python/fastText/util/util.py \n",
" inflating: fastText-0.2.0/quantization-example.sh \n",
" inflating: fastText-0.2.0/runtests.py \n",
" creating: fastText-0.2.0/scripts/\n",
" creating: fastText-0.2.0/scripts/kbcompletion/\n",
" inflating: fastText-0.2.0/scripts/kbcompletion/README.md \n",
" inflating: fastText-0.2.0/scripts/kbcompletion/data.sh \n",
" inflating: fastText-0.2.0/scripts/kbcompletion/eval.cpp \n",
" inflating: fastText-0.2.0/scripts/kbcompletion/fb15k.sh \n",
" inflating: fastText-0.2.0/scripts/kbcompletion/fb15k237.sh \n",
" inflating: fastText-0.2.0/scripts/kbcompletion/svo.sh \n",
" inflating: fastText-0.2.0/scripts/kbcompletion/wn18.sh \n",
" creating: fastText-0.2.0/scripts/quantization/\n",
" inflating: fastText-0.2.0/scripts/quantization/quantization-results.sh \n",
" extracting: fastText-0.2.0/setup.cfg \n",
" inflating: fastText-0.2.0/setup.py \n",
" creating: fastText-0.2.0/src/\n",
" inflating: fastText-0.2.0/src/args.cc \n",
" inflating: fastText-0.2.0/src/args.h \n",
" inflating: fastText-0.2.0/src/dictionary.cc \n",
" inflating: fastText-0.2.0/src/dictionary.h \n",
" inflating: fastText-0.2.0/src/fasttext.cc \n",
" inflating: fastText-0.2.0/src/fasttext.h \n",
" inflating: fastText-0.2.0/src/main.cc \n",
" inflating: fastText-0.2.0/src/matrix.cc \n",
" inflating: fastText-0.2.0/src/matrix.h \n",
" inflating: fastText-0.2.0/src/meter.cc \n",
" inflating: fastText-0.2.0/src/meter.h \n",
" inflating: fastText-0.2.0/src/model.cc \n",
" inflating: fastText-0.2.0/src/model.h \n",
" inflating: fastText-0.2.0/src/productquantizer.cc \n",
" inflating: fastText-0.2.0/src/productquantizer.h \n",
" inflating: fastText-0.2.0/src/qmatrix.cc \n",
" inflating: fastText-0.2.0/src/qmatrix.h \n",
" inflating: fastText-0.2.0/src/real.h \n",
" inflating: fastText-0.2.0/src/utils.cc \n",
" inflating: fastText-0.2.0/src/utils.h \n",
" inflating: fastText-0.2.0/src/vector.cc \n",
" inflating: fastText-0.2.0/src/vector.h \n",
" creating: fastText-0.2.0/tests/\n",
" inflating: fastText-0.2.0/tests/fetch_test_data.sh \n",
" creating: fastText-0.2.0/website/\n",
" inflating: fastText-0.2.0/website/README.md \n",
" creating: fastText-0.2.0/website/blog/\n",
" inflating: fastText-0.2.0/website/blog/2016-08-18-blog-post.md \n",
" inflating: fastText-0.2.0/website/blog/2017-05-02-blog-post.md \n",
" inflating: fastText-0.2.0/website/blog/2017-10-02-blog-post.md \n",
" creating: fastText-0.2.0/website/core/\n",
" inflating: fastText-0.2.0/website/core/Footer.js \n",
" inflating: fastText-0.2.0/website/package.json \n",
" creating: fastText-0.2.0/website/pages/\n",
" creating: fastText-0.2.0/website/pages/en/\n",
" inflating: fastText-0.2.0/website/pages/en/index.js \n",
" inflating: fastText-0.2.0/website/sidebars.json \n",
" inflating: fastText-0.2.0/website/siteConfig.js \n",
" creating: fastText-0.2.0/website/static/\n",
" creating: fastText-0.2.0/website/static/docs/\n",
" creating: fastText-0.2.0/website/static/docs/en/\n",
" creating: fastText-0.2.0/website/static/docs/en/html/\n",
" extracting: fastText-0.2.0/website/static/docs/en/html/.classfasttext_1_1QMatrix-members.html.i4eKqy \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/annotated.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/annotated_dup.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/args_8cc.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/args_8h.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/args_8h.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/args_8h_source.html \n",
" extracting: fastText-0.2.0/website/static/docs/en/html/bc_s.png \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/bdwn.png \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/classes.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Args-members.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Args.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Args.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Dictionary-members.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Dictionary.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Dictionary.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1FastText-members.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1FastText.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1FastText.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Matrix-members.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Matrix.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Matrix.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Model-members.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Model.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Model.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1ProductQuantizer-members.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1QMatrix-members.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1QMatrix.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1QMatrix.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Vector-members.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Vector.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Vector.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/closed.png \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/dictionary_8cc.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/dictionary_8h.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/dictionary_8h.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/dictionary_8h_source.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.js \n",
" extracting: fastText-0.2.0/website/static/docs/en/html/doc.png \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/doxygen.css \n",
" extracting: fastText-0.2.0/website/static/docs/en/html/doxygen.png \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/dynsections.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/fasttext_8cc.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/fasttext_8h.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/fasttext_8h.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/fasttext_8h_source.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/favicon.png \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/files.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/files.js \n",
" extracting: fastText-0.2.0/website/static/docs/en/html/folderclosed.png \n",
" extracting: fastText-0.2.0/website/static/docs/en/html/folderopen.png \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/functions.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/functions_0x7e.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/functions_b.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/functions_c.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/functions_d.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/functions_dup.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/functions_e.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/functions_f.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/functions_func.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/functions_g.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/functions_h.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/functions_i.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/functions_k.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/functions_l.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/functions_m.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/functions_n.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/functions_o.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/functions_p.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/functions_q.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/functions_r.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/functions_s.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/functions_t.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/functions_u.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/functions_v.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/functions_vars.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/functions_w.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/functions_z.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/globals.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/globals_defs.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/globals_func.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/index.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/jquery.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/main_8cc.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/main_8cc.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/matrix_8cc.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/matrix_8h.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/matrix_8h_source.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/menu.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/menudata.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/model_8cc.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/model_8h.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/model_8h.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/model_8h_source.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/namespacefasttext.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/namespacefasttext.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/namespacefasttext_1_1utils.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/namespacemembers.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/namespacemembers_enum.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/namespacemembers_func.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/namespacemembers_type.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/namespaces.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/namespaces.js \n",
" extracting: fastText-0.2.0/website/static/docs/en/html/nav_f.png \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/nav_g.png \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/nav_h.png \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/navtree.css \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/navtree.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/navtreedata.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/navtreeindex0.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/navtreeindex1.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/open.png \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/productquantizer_8cc.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/productquantizer_8cc.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/productquantizer_8h.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/productquantizer_8h_source.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/qmatrix_8cc.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/qmatrix_8h.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/qmatrix_8h_source.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/real_8h.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/real_8h.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/real_8h_source.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/resize.js \n",
" creating: fastText-0.2.0/website/static/docs/en/html/search/\n",
" extracting: fastText-0.2.0/website/static/docs/en/html/search/.files_7.html.StRRNc \n",
" extracting: fastText-0.2.0/website/static/docs/en/html/search/.variables_a.html.1MGQ27 \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_0.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_0.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_1.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_1.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_10.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_10.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_11.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_11.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_12.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_12.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_13.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_13.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_14.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_14.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_15.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_15.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_16.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_16.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_17.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_17.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_2.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_2.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_3.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_3.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_4.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_4.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_5.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_5.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_6.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_6.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_7.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_7.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_8.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_8.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_9.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_9.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_a.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_a.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_b.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_b.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_c.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_c.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_d.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_d.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_e.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_e.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_f.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/all_f.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_0.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_0.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_1.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_1.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_2.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_2.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_3.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_3.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_4.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_4.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_5.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_5.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_6.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_6.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_7.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_7.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_8.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_8.js \n",
" extracting: fastText-0.2.0/website/static/docs/en/html/search/close.png \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/defines_0.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/defines_0.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/defines_1.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/defines_1.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/defines_2.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/defines_2.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/defines_3.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/defines_3.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/enums_0.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/enums_0.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/enums_1.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/enums_1.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/enums_2.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/enums_2.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_0.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_0.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_1.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_1.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_2.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_2.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_3.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_3.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_4.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_4.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_5.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_5.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/files_0.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/files_0.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/files_1.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/files_1.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/files_2.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/files_2.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/files_3.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/files_3.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/files_4.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/files_4.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/files_5.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/files_5.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/files_6.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/files_6.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/files_7.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/files_7.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/files_8.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/files_8.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_0.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_0.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_1.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_1.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_10.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_10.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_11.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_11.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_12.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_12.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_13.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_13.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_14.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_14.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_15.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_15.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_16.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_16.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_17.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_17.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_2.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_2.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_3.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_3.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_4.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_4.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_5.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_5.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_6.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_6.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_7.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_7.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_8.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_8.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_9.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_9.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_a.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_a.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_b.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_b.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_c.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_c.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_d.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_d.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_e.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_e.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_f.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_f.js \n",
" extracting: fastText-0.2.0/website/static/docs/en/html/search/mag_sel.png \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/namespaces_0.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/namespaces_0.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/nomatches.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/search.css \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/search.js \n",
" extracting: fastText-0.2.0/website/static/docs/en/html/search/search_l.png \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/search_m.png \n",
" extracting: fastText-0.2.0/website/static/docs/en/html/search/search_r.png \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/searchdata.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/typedefs_0.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/typedefs_0.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/typedefs_1.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/typedefs_1.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_0.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_0.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_1.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_1.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_10.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_10.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_11.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_11.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_12.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_12.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_13.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_13.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_2.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_2.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_3.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_3.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_4.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_4.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_5.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_5.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_6.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_6.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_7.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_7.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_8.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_8.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_9.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_9.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_a.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_a.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_b.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_b.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_c.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_c.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_d.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_d.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_e.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_e.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_f.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_f.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/splitbar.png \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/structfasttext_1_1Node-members.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/structfasttext_1_1Node.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/structfasttext_1_1Node.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/structfasttext_1_1entry-members.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/structfasttext_1_1entry.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/structfasttext_1_1entry.js \n",
" extracting: fastText-0.2.0/website/static/docs/en/html/sync_off.png \n",
" extracting: fastText-0.2.0/website/static/docs/en/html/sync_on.png \n",
" extracting: fastText-0.2.0/website/static/docs/en/html/tab_a.png \n",
" extracting: fastText-0.2.0/website/static/docs/en/html/tab_b.png \n",
" extracting: fastText-0.2.0/website/static/docs/en/html/tab_h.png \n",
" extracting: fastText-0.2.0/website/static/docs/en/html/tab_s.png \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/tabs.css \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/utils_8cc.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/utils_8cc.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/utils_8h.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/utils_8h.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/utils_8h_source.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/vector_8cc.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/vector_8cc.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/vector_8h.html \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/vector_8h.js \n",
" inflating: fastText-0.2.0/website/static/docs/en/html/vector_8h_source.html \n",
" inflating: fastText-0.2.0/website/static/fasttext.css \n",
" creating: fastText-0.2.0/website/static/img/\n",
" creating: fastText-0.2.0/website/static/img/authors/\n",
" inflating: fastText-0.2.0/website/static/img/authors/armand_joulin.jpg \n",
" inflating: fastText-0.2.0/website/static/img/authors/christian_puhrsch.png \n",
" inflating: fastText-0.2.0/website/static/img/authors/edouard_grave.jpeg \n",
" inflating: fastText-0.2.0/website/static/img/authors/piotr_bojanowski.jpg \n",
" inflating: fastText-0.2.0/website/static/img/authors/tomas_mikolov.jpg \n",
" creating: fastText-0.2.0/website/static/img/blog/\n",
" inflating: fastText-0.2.0/website/static/img/blog/2016-08-18-blog-post-img1.png \n",
" inflating: fastText-0.2.0/website/static/img/blog/2016-08-18-blog-post-img2.png \n",
" inflating: fastText-0.2.0/website/static/img/blog/2017-05-02-blog-post-img1.jpg \n",
" inflating: fastText-0.2.0/website/static/img/blog/2017-05-02-blog-post-img2.jpg \n",
" inflating: fastText-0.2.0/website/static/img/blog/2017-10-02-blog-post-img1.png \n",
" inflating: fastText-0.2.0/website/static/img/cbo_vs_skipgram.png \n",
" inflating: fastText-0.2.0/website/static/img/fasttext-icon-api.png \n",
" inflating: fastText-0.2.0/website/static/img/fasttext-icon-bg-web.png \n",
" inflating: fastText-0.2.0/website/static/img/fasttext-icon-color-square.png \n",
" inflating: fastText-0.2.0/website/static/img/fasttext-icon-color-web.png \n",
" inflating: fastText-0.2.0/website/static/img/fasttext-icon-faq.png \n",
" inflating: fastText-0.2.0/website/static/img/fasttext-icon-tutorial.png \n",
" inflating: fastText-0.2.0/website/static/img/fasttext-icon-white-web.png \n",
" inflating: fastText-0.2.0/website/static/img/fasttext-logo-color-web.png \n",
" inflating: fastText-0.2.0/website/static/img/fasttext-logo-white-web.png \n",
" inflating: fastText-0.2.0/website/static/img/logo-color.png \n",
" inflating: fastText-0.2.0/website/static/img/model-black.png \n",
" inflating: fastText-0.2.0/website/static/img/model-blue.png \n",
" inflating: fastText-0.2.0/website/static/img/model-red.png \n",
" inflating: fastText-0.2.0/website/static/img/ogimage.png \n",
" inflating: fastText-0.2.0/website/static/img/oss_logo.png \n",
" inflating: fastText-0.2.0/wikifil.pl \n",
" inflating: fastText-0.2.0/word-vector-example.sh \n",
"/content/fastText-0.2.0\n",
"c++ -pthread -std=c++0x -march=native -O3 -funroll-loops -c src/args.cc\n",
"c++ -pthread -std=c++0x -march=native -O3 -funroll-loops -c src/dictionary.cc\n",
"c++ -pthread -std=c++0x -march=native -O3 -funroll-loops -c src/productquantizer.cc\n",
"c++ -pthread -std=c++0x -march=native -O3 -funroll-loops -c src/matrix.cc\n",
"c++ -pthread -std=c++0x -march=native -O3 -funroll-loops -c src/qmatrix.cc\n",
"c++ -pthread -std=c++0x -march=native -O3 -funroll-loops -c src/vector.cc\n",
"c++ -pthread -std=c++0x -march=native -O3 -funroll-loops -c src/model.cc\n",
"c++ -pthread -std=c++0x -march=native -O3 -funroll-loops -c src/utils.cc\n",
"c++ -pthread -std=c++0x -march=native -O3 -funroll-loops -c src/meter.cc\n",
"c++ -pthread -std=c++0x -march=native -O3 -funroll-loops -c src/fasttext.cc\n",
"\u001b[01m\u001b[Ksrc/fasttext.cc:\u001b[m\u001b[K In member function ‘\u001b[01m\u001b[Kvoid fasttext::FastText::quantize(const fasttext::Args&)\u001b[m\u001b[K’:\n",
"\u001b[01m\u001b[Ksrc/fasttext.cc:302:45:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K‘\u001b[01m\u001b[Kstd::vector<int> fasttext::FastText::selectEmbeddings(int32_t) const\u001b[m\u001b[K’ is deprecated: selectEmbeddings is being deprecated. [\u001b[01;35m\u001b[K-Wdeprecated-declarations\u001b[m\u001b[K]\n",
" auto idx = selectEmbeddings(qargs.cutoff\u001b[01;35m\u001b[K)\u001b[m\u001b[K;\n",
" \u001b[01;35m\u001b[K^\u001b[m\u001b[K\n",
"\u001b[01m\u001b[Ksrc/fasttext.cc:279:22:\u001b[m\u001b[K \u001b[01;36m\u001b[Knote: \u001b[m\u001b[Kdeclared here\n",
" std::vector<int32_t> \u001b[01;36m\u001b[KFastText\u001b[m\u001b[K::selectEmbeddings(int32_t cutoff) const {\n",
" \u001b[01;36m\u001b[K^~~~~~~~\u001b[m\u001b[K\n",
"\u001b[01m\u001b[Ksrc/fasttext.cc:\u001b[m\u001b[K In member function ‘\u001b[01m\u001b[Kvoid fasttext::FastText::lazyComputeWordVectors()\u001b[m\u001b[K’:\n",
"\u001b[01m\u001b[Ksrc/fasttext.cc:531:40:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K‘\u001b[01m\u001b[Kvoid fasttext::FastText::precomputeWordVectors(fasttext::Matrix&)\u001b[m\u001b[K’ is deprecated: precomputeWordVectors is being deprecated. [\u001b[01;35m\u001b[K-Wdeprecated-declarations\u001b[m\u001b[K]\n",
" precomputeWordVectors(*wordVectors_\u001b[01;35m\u001b[K)\u001b[m\u001b[K;\n",
" \u001b[01;35m\u001b[K^\u001b[m\u001b[K\n",
"\u001b[01m\u001b[Ksrc/fasttext.cc:514:6:\u001b[m\u001b[K \u001b[01;36m\u001b[Knote: \u001b[m\u001b[Kdeclared here\n",
" void \u001b[01;36m\u001b[KFastText\u001b[m\u001b[K::precomputeWordVectors(Matrix& wordVectors) {\n",
" \u001b[01;36m\u001b[K^~~~~~~~\u001b[m\u001b[K\n",
"\u001b[01m\u001b[Ksrc/fasttext.cc:\u001b[m\u001b[K In member function ‘\u001b[01m\u001b[Kvoid fasttext::FastText::trainThread(int32_t)\u001b[m\u001b[K’:\n",
"\u001b[01m\u001b[Ksrc/fasttext.cc:650:41:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K‘\u001b[01m\u001b[Kvoid fasttext::FastText::supervised(fasttext::Model&, fasttext::real, const std::vector<int>&, const std::vector<int>&)\u001b[m\u001b[K’ is deprecated: supervised is being deprecated. [\u001b[01;35m\u001b[K-Wdeprecated-declarations\u001b[m\u001b[K]\n",
" supervised(model, lr, line, labels\u001b[01;35m\u001b[K)\u001b[m\u001b[K;\n",
" \u001b[01;35m\u001b[K^\u001b[m\u001b[K\n",
"\u001b[01m\u001b[Ksrc/fasttext.cc:338:6:\u001b[m\u001b[K \u001b[01;36m\u001b[Knote: \u001b[m\u001b[Kdeclared here\n",
" void \u001b[01;36m\u001b[KFastText\u001b[m\u001b[K::supervised(\n",
" \u001b[01;36m\u001b[K^~~~~~~~\u001b[m\u001b[K\n",
"\u001b[01m\u001b[Ksrc/fasttext.cc:653:27:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K‘\u001b[01m\u001b[Kvoid fasttext::FastText::cbow(fasttext::Model&, fasttext::real, const std::vector<int>&)\u001b[m\u001b[K’ is deprecated: cbow is being deprecated. [\u001b[01;35m\u001b[K-Wdeprecated-declarations\u001b[m\u001b[K]\n",
" cbow(model, lr, line\u001b[01;35m\u001b[K)\u001b[m\u001b[K;\n",
" \u001b[01;35m\u001b[K^\u001b[m\u001b[K\n",
"\u001b[01m\u001b[Ksrc/fasttext.cc:355:6:\u001b[m\u001b[K \u001b[01;36m\u001b[Knote: \u001b[m\u001b[Kdeclared here\n",
" void \u001b[01;36m\u001b[KFastText\u001b[m\u001b[K::cbow(Model& model, real lr, const std::vector<int32_t>& line) {\n",
" \u001b[01;36m\u001b[K^~~~~~~~\u001b[m\u001b[K\n",
"\u001b[01m\u001b[Ksrc/fasttext.cc:656:31:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K‘\u001b[01m\u001b[Kvoid fasttext::FastText::skipgram(fasttext::Model&, fasttext::real, const std::vector<int>&)\u001b[m\u001b[K’ is deprecated: skipgram is being deprecated. [\u001b[01;35m\u001b[K-Wdeprecated-declarations\u001b[m\u001b[K]\n",
" skipgram(model, lr, line\u001b[01;35m\u001b[K)\u001b[m\u001b[K;\n",
" \u001b[01;35m\u001b[K^\u001b[m\u001b[K\n",
"\u001b[01m\u001b[Ksrc/fasttext.cc:371:6:\u001b[m\u001b[K \u001b[01;36m\u001b[Knote: \u001b[m\u001b[Kdeclared here\n",
" void \u001b[01;36m\u001b[KFastText\u001b[m\u001b[K::skipgram(\n",
" \u001b[01;36m\u001b[K^~~~~~~~\u001b[m\u001b[K\n",
"c++ -pthread -std=c++0x -march=native -O3 -funroll-loops args.o dictionary.o productquantizer.o matrix.o qmatrix.o vector.o model.o utils.o meter.o fasttext.o src/main.cc -o fasttext\n"
],
"name": "stdout"
}
]
},
{
"metadata": {
"id": "5JauDviyqqL-",
"colab_type": "text"
},
"cell_type": "markdown",
"source": [
"## Make simple dataset"
]
},
{
"metadata": {
"id": "ALMQ3gjFqqZS",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"# 1 is positive, 0 is negative\n",
"f = open('train.txt', 'w')\n",
"f.write('__label__1 i love you\\n')\n",
"f.write('__label__1 he loves me\\n')\n",
"f.write('__label__1 she likes baseball\\n')\n",
"f.write('__label__0 i hate you\\n')\n",
"f.write('__label__0 sorry for that\\n')\n",
"f.write('__label__0 this is awful')\n",
"f.close()\n",
"\n",
"f = open('test.txt', 'w')\n",
"f.write('sorry hate you')\n",
"f.close()"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "i3_PpexwsN_a",
"colab_type": "text"
},
"cell_type": "markdown",
"source": [
"## Training"
]
},
{
"metadata": {
"id": "q06m76JusOQ8",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 92
},
"outputId": "4ed3502d-4aec-4d06-cb02-b8392978ce14"
},
"cell_type": "code",
"source": [
"!./fasttext supervised -input train.txt -output model -dim 2"
],
"execution_count": 18,
"outputs": [
{
"output_type": "stream",
"text": [
"\rRead 0M words\n",
"Number of words: 17\n",
"Number of labels: 2\n",
"\rProgress: 100.0% words/sec/thread: 17608 lr: 0.000000 loss: 0.672308 ETA: 0h 0m\n"
],
"name": "stdout"
}
]
},
{
"metadata": {
"id": "C77MXO-GsOpi",
"colab_type": "text"
},
"cell_type": "markdown",
"source": [
"## Predict"
]
},
{
"metadata": {
"id": "y1yDPCjVsO6x",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 36
},
"outputId": "8963d7bd-01c8-40b9-e1ee-1446cb1b3454"
},
"cell_type": "code",
"source": [
"!cat test.txt\n",
"!./fasttext predict model.bin test.txt"
],
"execution_count": 22,
"outputs": [
{
"output_type": "stream",
"text": [
"sorry hate you__label__0\n"
],
"name": "stdout"
}
]
}
]
}
================================================
FILE: 1-3.FastText/test.txt
================================================
sorry hate you
================================================
FILE: 1-3.FastText/train.txt
================================================
__label__1 i love you
__label__1 he loves me
__label__1 she likes baseball
__label__0 i hate you
__label__0 sorry for that
__label__0 this is awful
================================================
FILE: 2-1.TextCNN/TextCNN.ipynb
================================================
{
"cells": [
{
"cell_type": "code",
"metadata": {},
"source": [
"# code by Tae Hwan Jung @graykode\n",
"import numpy as np\n",
"import torch\n",
"import torch.nn as nn\n",
"import torch.optim as optim\n",
"import torch.nn.functional as F\n",
"\n",
"class TextCNN(nn.Module):\n",
" def __init__(self):\n",
" super(TextCNN, self).__init__()\n",
" self.num_filters_total = num_filters * len(filter_sizes)\n",
" self.W = nn.Embedding(vocab_size, embedding_size)\n",
" self.Weight = nn.Linear(self.num_filters_total, num_classes, bias=False)\n",
" self.Bias = nn.Parameter(torch.ones([num_classes]))\n",
" self.filter_list = nn.ModuleList([nn.Conv2d(1, num_filters, (size, embedding_size)) for size in filter_sizes])\n",
"\n",
" def forward(self, X):\n",
" embedded_chars = self.W(X) # [batch_size, sequence_length, sequence_length]\n",
" embedded_chars = embedded_chars.unsqueeze(1) # add channel(=1) [batch, channel(=1), sequence_length, embedding_size]\n",
"\n",
" pooled_outputs = []\n",
" for i, conv in enumerate(self.filter_list):\n",
" # conv : [input_channel(=1), output_channel(=3), (filter_height, filter_width), bias_option]\n",
" h = F.relu(conv(embedded_chars))\n",
" # mp : ((filter_height, filter_width))\n",
" mp = nn.MaxPool2d((sequence_length - filter_sizes[i] + 1, 1))\n",
" # pooled : [batch_size(=6), output_height(=1), output_width(=1), output_channel(=3)]\n",
" pooled = mp(h).permute(0, 3, 2, 1)\n",
" pooled_outputs.append(pooled)\n",
"\n",
" h_pool = torch.cat(pooled_outputs, len(filter_sizes)) # [batch_size(=6), output_height(=1), output_width(=1), output_channel(=3) * 3]\n",
" h_pool_flat = torch.reshape(h_pool, [-1, self.num_filters_total]) # [batch_size(=6), output_height * output_width * (output_channel * 3)]\n",
" model = self.Weight(h_pool_flat) + self.Bias # [batch_size, num_classes]\n",
" return model\n",
"\n",
"if __name__ == '__main__':\n",
" embedding_size = 2 # embedding size\n",
" sequence_length = 3 # sequence length\n",
" num_classes = 2 # number of classes\n",
" filter_sizes = [2, 2, 2] # n-gram windows\n",
" num_filters = 3 # number of filters\n",
"\n",
" # 3 words sentences (=sequence_length is 3)\n",
" sentences = [\"i love you\", \"he loves me\", \"she likes baseball\", \"i hate you\", \"sorry for that\", \"this is awful\"]\n",
" labels = [1, 1, 1, 0, 0, 0] # 1 is good, 0 is not good.\n",
"\n",
" word_list = \" \".join(sentences).split()\n",
" word_list = list(set(word_list))\n",
" word_dict = {w: i for i, w in enumerate(word_list)}\n",
" vocab_size = len(word_dict)\n",
"\n",
" model = TextCNN()\n",
"\n",
" criterion = nn.CrossEntropyLoss()\n",
" optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
"\n",
" inputs = torch.LongTensor([np.asarray([word_dict[n] for n in sen.split()]) for sen in sentences])\n",
" targets = torch.LongTensor([out for out in labels]) # To using Torch Softmax Loss function\n",
"\n",
" # Training\n",
" for epoch in range(5000):\n",
" optimizer.zero_grad()\n",
" output = model(inputs)\n",
"\n",
" # output : [batch_size, num_classes], target_batch : [batch_size] (LongTensor, not one-hot)\n",
" loss = criterion(output, targets)\n",
" if (epoch + 1) % 1000 == 0:\n",
" print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n",
"\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
" # Test\n",
" test_text = 'sorry hate you'\n",
" tests = [np.asarray([word_dict[n] for n in test_text.split()])]\n",
" test_batch = torch.LongTensor(tests)\n",
"\n",
" # Predict\n",
" predict = model(test_batch).data.max(1, keepdim=True)[1]\n",
" if predict[0][0] == 0:\n",
" print(test_text,\"is Bad Mean...\")\n",
" else:\n",
" print(test_text,\"is Good Mean!!\")"
],
"outputs": [],
"execution_count": null
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
================================================
FILE: 2-1.TextCNN/TextCNN.py
================================================
# %%
# code by Tae Hwan Jung @graykode
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
class TextCNN(nn.Module):
def __init__(self):
super(TextCNN, self).__init__()
self.num_filters_total = num_filters * len(filter_sizes)
self.W = nn.Embedding(vocab_size, embedding_size)
self.Weight = nn.Linear(self.num_filters_total, num_classes, bias=False)
self.Bias = nn.Parameter(torch.ones([num_classes]))
self.filter_list = nn.ModuleList([nn.Conv2d(1, num_filters, (size, embedding_size)) for size in filter_sizes])
def forward(self, X):
embedded_chars = self.W(X) # [batch_size, sequence_length, sequence_length]
embedded_chars = embedded_chars.unsqueeze(1) # add channel(=1) [batch, channel(=1), sequence_length, embedding_size]
pooled_outputs = []
for i, conv in enumerate(self.filter_list):
# conv : [input_channel(=1), output_channel(=3), (filter_height, filter_width), bias_option]
h = F.relu(conv(embedded_chars))
# mp : ((filter_height, filter_width))
mp = nn.MaxPool2d((sequence_length - filter_sizes[i] + 1, 1))
# pooled : [batch_size(=6), output_height(=1), output_width(=1), output_channel(=3)]
pooled = mp(h).permute(0, 3, 2, 1)
pooled_outputs.append(pooled)
h_pool = torch.cat(pooled_outputs, len(filter_sizes)) # [batch_size(=6), output_height(=1), output_width(=1), output_channel(=3) * 3]
h_pool_flat = torch.reshape(h_pool, [-1, self.num_filters_total]) # [batch_size(=6), output_height * output_width * (output_channel * 3)]
model = self.Weight(h_pool_flat) + self.Bias # [batch_size, num_classes]
return model
if __name__ == '__main__':
embedding_size = 2 # embedding size
sequence_length = 3 # sequence length
num_classes = 2 # number of classes
filter_sizes = [2, 2, 2] # n-gram windows
num_filters = 3 # number of filters
# 3 words sentences (=sequence_length is 3)
sentences = ["i love you", "he loves me", "she likes baseball", "i hate you", "sorry for that", "this is awful"]
labels = [1, 1, 1, 0, 0, 0] # 1 is good, 0 is not good.
word_list = " ".join(sentences).split()
word_list = list(set(word_list))
word_dict = {w: i for i, w in enumerate(word_list)}
vocab_size = len(word_dict)
model = TextCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
inputs = torch.LongTensor([np.asarray([word_dict[n] for n in sen.split()]) for sen in sentences])
targets = torch.LongTensor([out for out in labels]) # To using Torch Softmax Loss function
# Training
for epoch in range(5000):
optimizer.zero_grad()
output = model(inputs)
# output : [batch_size, num_classes], target_batch : [batch_size] (LongTensor, not one-hot)
loss = criterion(output, targets)
if (epoch + 1) % 1000 == 0:
print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
loss.backward()
optimizer.step()
# Test
test_text = 'sorry hate you'
tests = [np.asarray([word_dict[n] for n in test_text.split()])]
test_batch = torch.LongTensor(tests)
# Predict
predict = model(test_batch).data.max(1, keepdim=True)[1]
if predict[0][0] == 0:
print(test_text,"is Bad Mean...")
else:
print(test_text,"is Good Mean!!")
================================================
FILE: 3-1.TextRNN/TextRNN.ipynb
================================================
{
"cells": [
{
"cell_type": "code",
"metadata": {},
"source": [
"# code by Tae Hwan Jung @graykode\n",
"import numpy as np\n",
"import torch\n",
"import torch.nn as nn\n",
"import torch.optim as optim\n",
"\n",
"def make_batch():\n",
" input_batch = []\n",
" target_batch = []\n",
"\n",
" for sen in sentences:\n",
" word = sen.split() # space tokenizer\n",
" input = [word_dict[n] for n in word[:-1]] # create (1~n-1) as input\n",
" target = word_dict[word[-1]] # create (n) as target, We usually call this 'casual language model'\n",
"\n",
" input_batch.append(np.eye(n_class)[input])\n",
" target_batch.append(target)\n",
"\n",
" return input_batch, target_batch\n",
"\n",
"class TextRNN(nn.Module):\n",
" def __init__(self):\n",
" super(TextRNN, self).__init__()\n",
" self.rnn = nn.RNN(input_size=n_class, hidden_size=n_hidden)\n",
" self.W = nn.Linear(n_hidden, n_class, bias=False)\n",
" self.b = nn.Parameter(torch.ones([n_class]))\n",
"\n",
" def forward(self, hidden, X):\n",
" X = X.transpose(0, 1) # X : [n_step, batch_size, n_class]\n",
" outputs, hidden = self.rnn(X, hidden)\n",
" # outputs : [n_step, batch_size, num_directions(=1) * n_hidden]\n",
" # hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\n",
" outputs = outputs[-1] # [batch_size, num_directions(=1) * n_hidden]\n",
" model = self.W(outputs) + self.b # model : [batch_size, n_class]\n",
" return model\n",
"\n",
"if __name__ == '__main__':\n",
" n_step = 2 # number of cells(= number of Step)\n",
" n_hidden = 5 # number of hidden units in one cell\n",
"\n",
" sentences = [\"i like dog\", \"i love coffee\", \"i hate milk\"]\n",
"\n",
" word_list = \" \".join(sentences).split()\n",
" word_list = list(set(word_list))\n",
" word_dict = {w: i for i, w in enumerate(word_list)}\n",
" number_dict = {i: w for i, w in enumerate(word_list)}\n",
" n_class = len(word_dict)\n",
" batch_size = len(sentences)\n",
"\n",
" model = TextRNN()\n",
"\n",
" criterion = nn.CrossEntropyLoss()\n",
" optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
"\n",
" input_batch, target_batch = make_batch()\n",
" input_batch = torch.FloatTensor(input_batch)\n",
" target_batch = torch.LongTensor(target_batch)\n",
"\n",
" # Training\n",
" for epoch in range(5000):\n",
" optimizer.zero_grad()\n",
"\n",
" # hidden : [num_layers * num_directions, batch, hidden_size]\n",
" hidden = torch.zeros(1, batch_size, n_hidden)\n",
" # input_batch : [batch_size, n_step, n_class]\n",
" output = model(hidden, input_batch)\n",
"\n",
" # output : [batch_size, n_class], target_batch : [batch_size] (LongTensor, not one-hot)\n",
" loss = criterion(output, target_batch)\n",
" if (epoch + 1) % 1000 == 0:\n",
" print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n",
"\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
" input = [sen.split()[:2] for sen in sentences]\n",
"\n",
" # Predict\n",
" hidden = torch.zeros(1, batch_size, n_hidden)\n",
" predict = model(hidden, input_batch).data.max(1, keepdim=True)[1]\n",
" print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])"
],
"outputs": [],
"execution_count": null
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
================================================
FILE: 3-1.TextRNN/TextRNN.py
================================================
# %%
# code by Tae Hwan Jung @graykode
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
def make_batch():
input_batch = []
target_batch = []
for sen in sentences:
word = sen.split() # space tokenizer
input = [word_dict[n] for n in word[:-1]] # create (1~n-1) as input
target = word_dict[word[-1]] # create (n) as target, We usually call this 'casual language model'
input_batch.append(np.eye(n_class)[input])
target_batch.append(target)
return input_batch, target_batch
class TextRNN(nn.Module):
def __init__(self):
super(TextRNN, self).__init__()
self.rnn = nn.RNN(input_size=n_class, hidden_size=n_hidden)
self.W = nn.Linear(n_hidden, n_class, bias=False)
self.b = nn.Parameter(torch.ones([n_class]))
def forward(self, hidden, X):
X = X.transpose(0, 1) # X : [n_step, batch_size, n_class]
outputs, hidden = self.rnn(X, hidden)
# outputs : [n_step, batch_size, num_directions(=1) * n_hidden]
# hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]
outputs = outputs[-1] # [batch_size, num_directions(=1) * n_hidden]
model = self.W(outputs) + self.b # model : [batch_size, n_class]
return model
if __name__ == '__main__':
n_step = 2 # number of cells(= number of Step)
n_hidden = 5 # number of hidden units in one cell
sentences = ["i like dog", "i love coffee", "i hate milk"]
word_list = " ".join(sentences).split()
word_list = list(set(word_list))
word_dict = {w: i for i, w in enumerate(word_list)}
number_dict = {i: w for i, w in enumerate(word_list)}
n_class = len(word_dict)
batch_size = len(sentences)
model = TextRNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
input_batch, target_batch = make_batch()
input_batch = torch.FloatTensor(input_batch)
target_batch = torch.LongTensor(target_batch)
# Training
for epoch in range(5000):
optimizer.zero_grad()
# hidden : [num_layers * num_directions, batch, hidden_size]
hidden = torch.zeros(1, batch_size, n_hidden)
# input_batch : [batch_size, n_step, n_class]
output = model(hidden, input_batch)
# output : [batch_size, n_class], target_batch : [batch_size] (LongTensor, not one-hot)
loss = criterion(output, target_batch)
if (epoch + 1) % 1000 == 0:
print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
loss.backward()
optimizer.step()
input = [sen.split()[:2] for sen in sentences]
# Predict
hidden = torch.zeros(1, batch_size, n_hidden)
predict = model(hidden, input_batch).data.max(1, keepdim=True)[1]
print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])
================================================
FILE: 3-2.TextLSTM/TextLSTM.ipynb
================================================
{
"cells": [
{
"cell_type": "code",
"metadata": {},
"source": [
"# code by Tae Hwan Jung @graykode\n",
"import numpy as np\n",
"import torch\n",
"import torch.nn as nn\n",
"import torch.optim as optim\n",
"\n",
"def make_batch():\n",
" input_batch, target_batch = [], []\n",
"\n",
" for seq in seq_data:\n",
" input = [word_dict[n] for n in seq[:-1]] # 'm', 'a' , 'k' is input\n",
" target = word_dict[seq[-1]] # 'e' is target\n",
" input_batch.append(np.eye(n_class)[input])\n",
" target_batch.append(target)\n",
"\n",
" return input_batch, target_batch\n",
"\n",
"class TextLSTM(nn.Module):\n",
" def __init__(self):\n",
" super(TextLSTM, self).__init__()\n",
"\n",
" self.lstm = nn.LSTM(input_size=n_class, hidden_size=n_hidden)\n",
" self.W = nn.Linear(n_hidden, n_class, bias=False)\n",
" self.b = nn.Parameter(torch.ones([n_class]))\n",
"\n",
" def forward(self, X):\n",
" input = X.transpose(0, 1) # X : [n_step, batch_size, n_class]\n",
"\n",
" hidden_state = torch.zeros(1, len(X), n_hidden) # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\n",
" cell_state = torch.zeros(1, len(X), n_hidden) # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\n",
"\n",
" outputs, (_, _) = self.lstm(input, (hidden_state, cell_state))\n",
" outputs = outputs[-1] # [batch_size, n_hidden]\n",
" model = self.W(outputs) + self.b # model : [batch_size, n_class]\n",
" return model\n",
"\n",
"if __name__ == '__main__':\n",
" n_step = 3 # number of cells(= number of Step)\n",
" n_hidden = 128 # number of hidden units in one cell\n",
"\n",
" char_arr = [c for c in 'abcdefghijklmnopqrstuvwxyz']\n",
" word_dict = {n: i for i, n in enumerate(char_arr)}\n",
" number_dict = {i: w for i, w in enumerate(char_arr)}\n",
" n_class = len(word_dict) # number of class(=number of vocab)\n",
"\n",
" seq_data = ['make', 'need', 'coal', 'word', 'love', 'hate', 'live', 'home', 'hash', 'star']\n",
"\n",
" model = TextLSTM()\n",
"\n",
" criterion = nn.CrossEntropyLoss()\n",
" optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
"\n",
" input_batch, target_batch = make_batch()\n",
" input_batch = torch.FloatTensor(input_batch)\n",
" target_batch = torch.LongTensor(target_batch)\n",
"\n",
" # Training\n",
" for epoch in range(1000):\n",
" optimizer.zero_grad()\n",
"\n",
" output = model(input_batch)\n",
" loss = criterion(output, target_batch)\n",
" if (epoch + 1) % 100 == 0:\n",
" print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n",
"\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
" inputs = [sen[:3] for sen in seq_data]\n",
"\n",
" predict = model(input_batch).data.max(1, keepdim=True)[1]\n",
" print(inputs, '->', [number_dict[n.item()] for n in predict.squeeze()])"
],
"outputs": [],
"execution_count": null
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
================================================
FILE: 3-2.TextLSTM/TextLSTM.py
================================================
# %%
# code by Tae Hwan Jung @graykode
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
def make_batch():
input_batch, target_batch = [], []
for seq in seq_data:
input = [word_dict[n] for n in seq[:-1]] # 'm', 'a' , 'k' is input
target = word_dict[seq[-1]] # 'e' is target
input_batch.append(np.eye(n_class)[input])
target_batch.append(target)
return input_batch, target_batch
class TextLSTM(nn.Module):
def __init__(self):
super(TextLSTM, self).__init__()
self.lstm = nn.LSTM(input_size=n_class, hidden_size=n_hidden)
self.W = nn.Linear(n_hidden, n_class, bias=False)
self.b = nn.Parameter(torch.ones([n_class]))
def forward(self, X):
input = X.transpose(0, 1) # X : [n_step, batch_size, n_class]
hidden_state = torch.zeros(1, len(X), n_hidden) # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]
cell_state = torch.zeros(1, len(X), n_hidden) # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]
outputs, (_, _) = self.lstm(input, (hidden_state, cell_state))
outputs = outputs[-1] # [batch_size, n_hidden]
model = self.W(outputs) + self.b # model : [batch_size, n_class]
return model
if __name__ == '__main__':
n_step = 3 # number of cells(= number of Step)
n_hidden = 128 # number of hidden units in one cell
char_arr = [c for c in 'abcdefghijklmnopqrstuvwxyz']
word_dict = {n: i for i, n in enumerate(char_arr)}
number_dict = {i: w for i, w in enumerate(char_arr)}
n_class = len(word_dict) # number of class(=number of vocab)
seq_data = ['make', 'need', 'coal', 'word', 'love', 'hate', 'live', 'home', 'hash', 'star']
model = TextLSTM()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
input_batch, target_batch = make_batch()
input_batch = torch.FloatTensor(input_batch)
target_batch = torch.LongTensor(target_batch)
# Training
for epoch in range(1000):
optimizer.zero_grad()
output = model(input_batch)
loss = criterion(output, target_batch)
if (epoch + 1) % 100 == 0:
print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
loss.backward()
optimizer.step()
inputs = [sen[:3] for sen in seq_data]
predict = model(input_batch).data.max(1, keepdim=True)[1]
print(inputs, '->', [number_dict[n.item()] for n in predict.squeeze()])
================================================
FILE: 3-3.Bi-LSTM/Bi-LSTM.ipynb
================================================
{
"cells": [
{
"cell_type": "code",
"metadata": {},
"source": [
"# code by Tae Hwan Jung @graykode\n",
"import numpy as np\n",
"import torch\n",
"import torch.nn as nn\n",
"import torch.optim as optim\n",
"\n",
"def make_batch():\n",
" input_batch = []\n",
" target_batch = []\n",
"\n",
" words = sentence.split()\n",
" for i, word in enumerate(words[:-1]):\n",
" input = [word_dict[n] for n in words[:(i + 1)]]\n",
" input = input + [0] * (max_len - len(input))\n",
" target = word_dict[words[i + 1]]\n",
" input_batch.append(np.eye(n_class)[input])\n",
" target_batch.append(target)\n",
"\n",
" return input_batch, target_batch\n",
"\n",
"class BiLSTM(nn.Module):\n",
" def __init__(self):\n",
" super(BiLSTM, self).__init__()\n",
"\n",
" self.lstm = nn.LSTM(input_size=n_class, hidden_size=n_hidden, bidirectional=True)\n",
" self.W = nn.Linear(n_hidden * 2, n_class, bias=False)\n",
" self.b = nn.Parameter(torch.ones([n_class]))\n",
"\n",
" def forward(self, X):\n",
" input = X.transpose(0, 1) # input : [n_step, batch_size, n_class]\n",
"\n",
" hidden_state = torch.zeros(1*2, len(X), n_hidden) # [num_layers(=1) * num_directions(=2), batch_size, n_hidden]\n",
" cell_state = torch.zeros(1*2, len(X), n_hidden) # [num_layers(=1) * num_directions(=2), batch_size, n_hidden]\n",
"\n",
" outputs, (_, _) = self.lstm(input, (hidden_state, cell_state))\n",
" outputs = outputs[-1] # [batch_size, n_hidden]\n",
" model = self.W(outputs) + self.b # model : [batch_size, n_class]\n",
" return model\n",
"\n",
"if __name__ == '__main__':\n",
" n_hidden = 5 # number of hidden units in one cell\n",
"\n",
" sentence = (\n",
" 'Lorem ipsum dolor sit amet consectetur adipisicing elit '\n",
" 'sed do eiusmod tempor incididunt ut labore et dolore magna '\n",
" 'aliqua Ut enim ad minim veniam quis nostrud exercitation'\n",
" )\n",
"\n",
" word_dict = {w: i for i, w in enumerate(list(set(sentence.split())))}\n",
" number_dict = {i: w for i, w in enumerate(list(set(sentence.split())))}\n",
" n_class = len(word_dict)\n",
" max_len = len(sentence.split())\n",
"\n",
" model = BiLSTM()\n",
"\n",
" criterion = nn.CrossEntropyLoss()\n",
" optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
"\n",
" input_batch, target_batch = make_batch()\n",
" input_batch = torch.FloatTensor(input_batch)\n",
" target_batch = torch.LongTensor(target_batch)\n",
"\n",
" # Training\n",
" for epoch in range(10000):\n",
" optimizer.zero_grad()\n",
" output = model(input_batch)\n",
" loss = criterion(output, target_batch)\n",
" if (epoch + 1) % 1000 == 0:\n",
" print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n",
"\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
" predict = model(input_batch).data.max(1, keepdim=True)[1]\n",
" print(sentence)\n",
" print([number_dict[n.item()] for n in predict.squeeze()])\n"
],
"outputs": [],
"execution_count": null
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
================================================
FILE: 3-3.Bi-LSTM/Bi-LSTM.py
================================================
# %%
# code by Tae Hwan Jung @graykode
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
def make_batch():
input_batch = []
target_batch = []
words = sentence.split()
for i, word in enumerate(words[:-1]):
input = [word_dict[n] for n in words[:(i + 1)]]
input = input + [0] * (max_len - len(input))
target = word_dict[words[i + 1]]
input_batch.append(np.eye(n_class)[input])
target_batch.append(target)
return input_batch, target_batch
class BiLSTM(nn.Module):
def __init__(self):
super(BiLSTM, self).__init__()
self.lstm = nn.LSTM(input_size=n_class, hidden_size=n_hidden, bidirectional=True)
self.W = nn.Linear(n_hidden * 2, n_class, bias=False)
self.b = nn.Parameter(torch.ones([n_class]))
def forward(self, X):
input = X.transpose(0, 1) # input : [n_step, batch_size, n_class]
hidden_state = torch.zeros(1*2, len(X), n_hidden) # [num_layers(=1) * num_directions(=2), batch_size, n_hidden]
cell_state = torch.zeros(1*2, len(X), n_hidden) # [num_layers(=1) * num_directions(=2), batch_size, n_hidden]
outputs, (_, _) = self.lstm(input, (hidden_state, cell_state))
outputs = outputs[-1] # [batch_size, n_hidden]
model = self.W(outputs) + self.b # model : [batch_size, n_class]
return model
if __name__ == '__main__':
n_hidden = 5 # number of hidden units in one cell
sentence = (
'Lorem ipsum dolor sit amet consectetur adipisicing elit '
'sed do eiusmod tempor incididunt ut labore et dolore magna '
'aliqua Ut enim ad minim veniam quis nostrud exercitation'
)
word_dict = {w: i for i, w in enumerate(list(set(sentence.split())))}
number_dict = {i: w for i, w in enumerate(list(set(sentence.split())))}
n_class = len(word_dict)
max_len = len(sentence.split())
model = BiLSTM()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
input_batch, target_batch = make_batch()
input_batch = torch.FloatTensor(input_batch)
target_batch = torch.LongTensor(target_batch)
# Training
for epoch in range(10000):
optimizer.zero_grad()
output = model(input_batch)
loss = criterion(output, target_batch)
if (epoch + 1) % 1000 == 0:
print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
loss.backward()
optimizer.step()
predict = model(input_batch).data.max(1, keepdim=True)[1]
print(sentence)
print([number_dict[n.item()] for n in predict.squeeze()])
================================================
FILE: 4-1.Seq2Seq/Seq2Seq.ipynb
================================================
{
"cells": [
{
"cell_type": "code",
"metadata": {},
"source": [
"# code by Tae Hwan Jung @graykode\n",
"import numpy as np\n",
"import torch\n",
"import torch.nn as nn\n",
"\n",
"# S: Symbol that shows starting of decoding input\n",
"# E: Symbol that shows starting of decoding output\n",
"# P: Symbol that will fill in blank sequence if current batch data size is short than time steps\n",
"\n",
"def make_batch():\n",
" input_batch, output_batch, target_batch = [], [], []\n",
"\n",
" for seq in seq_data:\n",
" for i in range(2):\n",
" seq[i] = seq[i] + 'P' * (n_step - len(seq[i]))\n",
"\n",
" input = [num_dic[n] for n in seq[0]]\n",
" output = [num_dic[n] for n in ('S' + seq[1])]\n",
" target = [num_dic[n] for n in (seq[1] + 'E')]\n",
"\n",
" input_batch.append(np.eye(n_class)[input])\n",
" output_batch.append(np.eye(n_class)[output])\n",
" target_batch.append(target) # not one-hot\n",
"\n",
" # make tensor\n",
" return torch.FloatTensor(input_batch), torch.FloatTensor(output_batch), torch.LongTensor(target_batch)\n",
"\n",
"# make test batch\n",
"def make_testbatch(input_word):\n",
" input_batch, output_batch = [], []\n",
"\n",
" input_w = input_word + 'P' * (n_step - len(input_word))\n",
" input = [num_dic[n] for n in input_w]\n",
" output = [num_dic[n] for n in 'S' + 'P' * n_step]\n",
"\n",
" input_batch = np.eye(n_class)[input]\n",
" output_batch = np.eye(n_class)[output]\n",
"\n",
" return torch.FloatTensor(input_batch).unsqueeze(0), torch.FloatTensor(output_batch).unsqueeze(0)\n",
"\n",
"# Model\n",
"class Seq2Seq(nn.Module):\n",
" def __init__(self):\n",
" super(Seq2Seq, self).__init__()\n",
"\n",
" self.enc_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)\n",
" self.dec_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)\n",
" self.fc = nn.Linear(n_hidden, n_class)\n",
"\n",
" def forward(self, enc_input, enc_hidden, dec_input):\n",
" enc_input = enc_input.transpose(0, 1) # enc_input: [max_len(=n_step, time step), batch_size, n_class]\n",
" dec_input = dec_input.transpose(0, 1) # dec_input: [max_len(=n_step, time step), batch_size, n_class]\n",
"\n",
" # enc_states : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\n",
" _, enc_states = self.enc_cell(enc_input, enc_hidden)\n",
" # outputs : [max_len+1(=6), batch_size, num_directions(=1) * n_hidden(=128)]\n",
" outputs, _ = self.dec_cell(dec_input, enc_states)\n",
"\n",
" model = self.fc(outputs) # model : [max_len+1(=6), batch_size, n_class]\n",
" return model\n",
"\n",
"if __name__ == '__main__':\n",
" n_step = 5\n",
" n_hidden = 128\n",
"\n",
" char_arr = [c for c in 'SEPabcdefghijklmnopqrstuvwxyz']\n",
" num_dic = {n: i for i, n in enumerate(char_arr)}\n",
" seq_data = [['man', 'women'], ['black', 'white'], ['king', 'queen'], ['girl', 'boy'], ['up', 'down'], ['high', 'low']]\n",
"\n",
" n_class = len(num_dic)\n",
" batch_size = len(seq_data)\n",
"\n",
" model = Seq2Seq()\n",
"\n",
" criterion = nn.CrossEntropyLoss()\n",
" optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
"\n",
" input_batch, output_batch, target_batch = make_batch()\n",
"\n",
" for epoch in range(5000):\n",
" # make hidden shape [num_layers * num_directions, batch_size, n_hidden]\n",
" hidden = torch.zeros(1, batch_size, n_hidden)\n",
"\n",
" optimizer.zero_grad()\n",
" # input_batch : [batch_size, max_len(=n_step, time step), n_class]\n",
" # output_batch : [batch_size, max_len+1(=n_step, time step) (becase of 'S' or 'E'), n_class]\n",
" # target_batch : [batch_size, max_len+1(=n_step, time step)], not one-hot\n",
" output = model(input_batch, hidden, output_batch)\n",
" # output : [max_len+1, batch_size, n_class]\n",
" output = output.transpose(0, 1) # [batch_size, max_len+1(=6), n_class]\n",
" loss = 0\n",
" for i in range(0, len(target_batch)):\n",
" # output[i] : [max_len+1, n_class, target_batch[i] : max_len+1]\n",
" loss += criterion(output[i], target_batch[i])\n",
" if (epoch + 1) % 1000 == 0:\n",
" print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
" # Test\n",
" def translate(word):\n",
" input_batch, output_batch = make_testbatch(word)\n",
"\n",
" # make hidden shape [num_layers * num_directions, batch_size, n_hidden]\n",
" hidden = torch.zeros(1, 1, n_hidden)\n",
" output = model(input_batch, hidden, output_batch)\n",
" # output : [max_len+1(=6), batch_size(=1), n_class]\n",
"\n",
" predict = output.data.max(2, keepdim=True)[1] # select n_class dimension\n",
" decoded = [char_arr[i] for i in predict]\n",
" end = decoded.index('E')\n",
" translated = ''.join(decoded[:end])\n",
"\n",
" return translated.replace('P', '')\n",
"\n",
" print('test')\n",
" print('man ->', translate('man'))\n",
" print('mans ->', translate('mans'))\n",
" print('king ->', translate('king'))\n",
" print('black ->', translate('black'))\n",
" print('upp ->', translate('upp'))"
],
"outputs": [],
"execution_count": null
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
================================================
FILE: 4-1.Seq2Seq/Seq2Seq.py
================================================
# %%
# code by Tae Hwan Jung @graykode
import numpy as np
import torch
import torch.nn as nn
# S: Symbol that shows starting of decoding input
# E: Symbol that shows starting of decoding output
# P: Symbol that will fill in blank sequence if current batch data size is short than time steps
def make_batch():
input_batch, output_batch, target_batch = [], [], []
for seq in seq_data:
for i in range(2):
seq[i] = seq[i] + 'P' * (n_step - len(seq[i]))
input = [num_dic[n] for n in seq[0]]
output = [num_dic[n] for n in ('S' + seq[1])]
target = [num_dic[n] for n in (seq[1] + 'E')]
input_batch.append(np.eye(n_class)[input])
output_batch.append(np.eye(n_class)[output])
target_batch.append(target) # not one-hot
# make tensor
return torch.FloatTensor(input_batch), torch.FloatTensor(output_batch), torch.LongTensor(target_batch)
# make test batch
def make_testbatch(input_word):
input_batch, output_batch = [], []
input_w = input_word + 'P' * (n_step - len(input_word))
input = [num_dic[n] for n in input_w]
output = [num_dic[n] for n in 'S' + 'P' * n_step]
input_batch = np.eye(n_class)[input]
output_batch = np.eye(n_class)[output]
return torch.FloatTensor(input_batch).unsqueeze(0), torch.FloatTensor(output_batch).unsqueeze(0)
# Model
class Seq2Seq(nn.Module):
def __init__(self):
super(Seq2Seq, self).__init__()
self.enc_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)
self.dec_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)
self.fc = nn.Linear(n_hidden, n_class)
def forward(self, enc_input, enc_hidden, dec_input):
enc_input = enc_input.transpose(0, 1) # enc_input: [max_len(=n_step, time step), batch_size, n_class]
dec_input = dec_input.transpose(0, 1) # dec_input: [max_len(=n_step, time step), batch_size, n_class]
# enc_states : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]
_, enc_states = self.enc_cell(enc_input, enc_hidden)
# outputs : [max_len+1(=6), batch_size, num_directions(=1) * n_hidden(=128)]
outputs, _ = self.dec_cell(dec_input, enc_states)
model = self.fc(outputs) # model : [max_len+1(=6), batch_size, n_class]
return model
if __name__ == '__main__':
n_step = 5
n_hidden = 128
char_arr = [c for c in 'SEPabcdefghijklmnopqrstuvwxyz']
num_dic = {n: i for i, n in enumerate(char_arr)}
seq_data = [['man', 'women'], ['black', 'white'], ['king', 'queen'], ['girl', 'boy'], ['up', 'down'], ['high', 'low']]
n_class = len(num_dic)
batch_size = len(seq_data)
model = Seq2Seq()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
input_batch, output_batch, target_batch = make_batch()
for epoch in range(5000):
# make hidden shape [num_layers * num_directions, batch_size, n_hidden]
hidden = torch.zeros(1, batch_size, n_hidden)
optimizer.zero_grad()
# input_batch : [batch_size, max_len(=n_step, time step), n_class]
# output_batch : [batch_size, max_len+1(=n_step, time step) (becase of 'S' or 'E'), n_class]
# target_batch : [batch_size, max_len+1(=n_step, time step)], not one-hot
output = model(input_batch, hidden, output_batch)
# output : [max_len+1, batch_size, n_class]
output = output.transpose(0, 1) # [batch_size, max_len+1(=6), n_class]
loss = 0
for i in range(0, len(target_batch)):
# output[i] : [max_len+1, n_class, target_batch[i] : max_len+1]
loss += criterion(output[i], target_batch[i])
if (epoch + 1) % 1000 == 0:
print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
loss.backward()
optimizer.step()
# Test
def translate(word):
input_batch, output_batch = make_testbatch(word)
# make hidden shape [num_layers * num_directions, batch_size, n_hidden]
hidden = torch.zeros(1, 1, n_hidden)
output = model(input_batch, hidden, output_batch)
# output : [max_len+1(=6), batch_size(=1), n_class]
predict = output.data.max(2, keepdim=True)[1] # select n_class dimension
decoded = [char_arr[i] for i in predict]
end = decoded.index('E')
translated = ''.join(decoded[:end])
return translated.replace('P', '')
print('test')
print('man ->', translate('man'))
print('mans ->', translate('mans'))
print('king ->', translate('king'))
print('black ->', translate('black'))
print('upp ->', translate('upp'))
================================================
FILE: 4-2.Seq2Seq(Attention)/Seq2Seq(Attention).ipynb
================================================
{
"cells": [
{
"cell_type": "code",
"metadata": {},
"source": [
"# code by Tae Hwan Jung @graykode\n",
"# Reference : https://github.com/hunkim/PyTorchZeroToAll/blob/master/14_2_seq2seq_att.py\n",
"import numpy as np\n",
"import torch\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# S: Symbol that shows starting of decoding input\n",
"# E: Symbol that shows starting of decoding output\n",
"# P: Symbol that will fill in blank sequence if current batch data size is short than time steps\n",
"\n",
"def make_batch():\n",
" input_batch = [np.eye(n_class)[[word_dict[n] for n in sentences[0].split()]]]\n",
" output_batch = [np.eye(n_class)[[word_dict[n] for n in sentences[1].split()]]]\n",
" target_batch = [[word_dict[n] for n in sentences[2].split()]]\n",
"\n",
" # make tensor\n",
" return torch.FloatTensor(input_batch), torch.FloatTensor(output_batch), torch.LongTensor(target_batch)\n",
"\n",
"class Attention(nn.Module):\n",
" def __init__(self):\n",
" super(Attention, self).__init__()\n",
" self.enc_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)\n",
" self.dec_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)\n",
"\n",
" # Linear for attention\n",
" self.attn = nn.Linear(n_hidden, n_hidden)\n",
" self.out = nn.Linear(n_hidden * 2, n_class)\n",
"\n",
" def forward(self, enc_inputs, hidden, dec_inputs):\n",
" enc_inputs = enc_inputs.transpose(0, 1) # enc_inputs: [n_step(=n_step, time step), batch_size, n_class]\n",
" dec_inputs = dec_inputs.transpose(0, 1) # dec_inputs: [n_step(=n_step, time step), batch_size, n_class]\n",
"\n",
" # enc_outputs : [n_step, batch_size, num_directions(=1) * n_hidden], matrix F\n",
" # enc_hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\n",
" enc_outputs, enc_hidden = self.enc_cell(enc_inputs, hidden)\n",
"\n",
" trained_attn = []\n",
" hidden = enc_hidden\n",
" n_step = len(dec_inputs)\n",
" model = torch.empty([n_step, 1, n_class])\n",
"\n",
" for i in range(n_step): # each time step\n",
" # dec_output : [n_step(=1), batch_size(=1), num_directions(=1) * n_hidden]\n",
" # hidden : [num_layers(=1) * num_directions(=1), batch_size(=1), n_hidden]\n",
" dec_output, hidden = self.dec_cell(dec_inputs[i].unsqueeze(0), hidden)\n",
" attn_weights = self.get_att_weight(dec_output, enc_outputs) # attn_weights : [1, 1, n_step]\n",
" trained_attn.append(attn_weights.squeeze().data.numpy())\n",
"\n",
" # matrix-matrix product of matrices [1,1,n_step] x [1,n_step,n_hidden] = [1,1,n_hidden]\n",
" context = attn_weights.bmm(enc_outputs.transpose(0, 1))\n",
" dec_output = dec_output.squeeze(0) # dec_output : [batch_size(=1), num_directions(=1) * n_hidden]\n",
" context = context.squeeze(1) # [1, num_directions(=1) * n_hidden]\n",
" model[i] = self.out(torch.cat((dec_output, context), 1))\n",
"\n",
" # make model shape [n_step, n_class]\n",
" return model.transpose(0, 1).squeeze(0), trained_attn\n",
"\n",
" def get_att_weight(self, dec_output, enc_outputs): # get attention weight one 'dec_output' with 'enc_outputs'\n",
" n_step = len(enc_outputs)\n",
" attn_scores = torch.zeros(n_step) # attn_scores : [n_step]\n",
"\n",
" for i in range(n_step):\n",
" attn_scores[i] = self.get_att_score(dec_output, enc_outputs[i])\n",
"\n",
" # Normalize scores to weights in range 0 to 1\n",
" return F.softmax(attn_scores).view(1, 1, -1)\n",
"\n",
" def get_att_score(self, dec_output, enc_output): # enc_outputs [batch_size, num_directions(=1) * n_hidden]\n",
" score = self.attn(enc_output) # score : [batch_size, n_hidden]\n",
" return torch.dot(dec_output.view(-1), score.view(-1)) # inner product make scalar value\n",
"\n",
"if __name__ == '__main__':\n",
" n_step = 5 # number of cells(= number of Step)\n",
" n_hidden = 128 # number of hidden units in one cell\n",
"\n",
" sentences = ['ich mochte ein bier P', 'S i want a beer', 'i want a beer E']\n",
"\n",
" word_list = \" \".join(sentences).split()\n",
" word_list = list(set(word_list))\n",
" word_dict = {w: i for i, w in enumerate(word_list)}\n",
" number_dict = {i: w for i, w in enumerate(word_list)}\n",
" n_class = len(word_dict) # vocab list\n",
"\n",
" # hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\n",
" hidden = torch.zeros(1, 1, n_hidden)\n",
"\n",
" model = Attention()\n",
" criterion = nn.CrossEntropyLoss()\n",
" optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
"\n",
" input_batch, output_batch, target_batch = make_batch()\n",
"\n",
" # Train\n",
" for epoch in range(2000):\n",
" optimizer.zero_grad()\n",
" output, _ = model(input_batch, hidden, output_batch)\n",
"\n",
" loss = criterion(output, target_batch.squeeze(0))\n",
" if (epoch + 1) % 400 == 0:\n",
" print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n",
"\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
" # Test\n",
" test_batch = [np.eye(n_class)[[word_dict[n] for n in 'SPPPP']]]\n",
" test_batch = torch.FloatTensor(test_batch)\n",
" predict, trained_attn = model(input_batch, hidden, test_batch)\n",
" predict = predict.data.max(1, keepdim=True)[1]\n",
" print(sentences[0], '->', [number_dict[n.item()] for n in predict.squeeze()])\n",
"\n",
" # Show Attention\n",
" fig = plt.figure(figsize=(5, 5))\n",
" ax = fig.add_subplot(1, 1, 1)\n",
" ax.matshow(trained_attn, cmap='viridis')\n",
" ax.set_xticklabels([''] + sentences[0].split(), fontdict={'fontsize': 14})\n",
" ax.set_yticklabels([''] + sentences[2].split(), fontdict={'fontsize': 14})\n",
" plt.show()"
],
"outputs": [],
"execution_count": null
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
================================================
FILE: 4-2.Seq2Seq(Attention)/Seq2Seq(Attention).py
================================================
# %%
# code by Tae Hwan Jung @graykode
# Reference : https://github.com/hunkim/PyTorchZeroToAll/blob/master/14_2_seq2seq_att.py
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
# S: Symbol that shows starting of decoding input
# E: Symbol that shows starting of decoding output
# P: Symbol that will fill in blank sequence if current batch data size is short than time steps
def make_batch():
input_batch = [np.eye(n_class)[[word_dict[n] for n in sentences[0].split()]]]
output_batch = [np.eye(n_class)[[word_dict[n] for n in sentences[1].split()]]]
target_batch = [[word_dict[n] for n in sentences[2].split()]]
# make tensor
return torch.FloatTensor(input_batch), torch.FloatTensor(output_batch), torch.LongTensor(target_batch)
class Attention(nn.Module):
def __init__(self):
super(Attention, self).__init__()
self.enc_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)
self.dec_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)
# Linear for attention
self.attn = nn.Linear(n_hidden, n_hidden)
self.out = nn.Linear(n_hidden * 2, n_class)
def forward(self, enc_inputs, hidden, dec_inputs):
enc_inputs = enc_inputs.transpose(0, 1) # enc_inputs: [n_step(=n_step, time step), batch_size, n_class]
dec_inputs = dec_inputs.transpose(0, 1) # dec_inputs: [n_step(=n_step, time step), batch_size, n_class]
# enc_outputs : [n_step, batch_size, num_directions(=1) * n_hidden], matrix F
# enc_hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]
enc_outputs, enc_hidden = self.enc_cell(enc_inputs, hidden)
trained_attn = []
hidden = enc_hidden
n_step = len(dec_inputs)
model = torch.empty([n_step, 1, n_class])
for i in range(n_step): # each time step
# dec_output : [n_step(=1), batch_size(=1), num_directions(=1) * n_hidden]
# hidden : [num_layers(=1) * num_directions(=1), batch_size(=1), n_hidden]
dec_output, hidden = self.dec_cell(dec_inputs[i].unsqueeze(0), hidden)
attn_weights = self.get_att_weight(dec_output, enc_outputs) # attn_weights : [1, 1, n_step]
trained_attn.append(attn_weights.squeeze().data.numpy())
# matrix-matrix product of matrices [1,1,n_step] x [1,n_step,n_hidden] = [1,1,n_hidden]
context = attn_weights.bmm(enc_outputs.transpose(0, 1))
dec_output = dec_output.squeeze(0) # dec_output : [batch_size(=1), num_directions(=1) * n_hidden]
context = context.squeeze(1) # [1, num_directions(=1) * n_hidden]
model[i] = self.out(torch.cat((dec_output, context), 1))
# make model shape [n_step, n_class]
return model.transpose(0, 1).squeeze(0), trained_attn
def get_att_weight(self, dec_output, enc_outputs): # get attention weight one 'dec_output' with 'enc_outputs'
n_step = len(enc_outputs)
attn_scores = torch.zeros(n_step) # attn_scores : [n_step]
for i in range(n_step):
attn_scores[i] = self.get_att_score(dec_output, enc_outputs[i])
# Normalize scores to weights in range 0 to 1
return F.softmax(attn_scores).view(1, 1, -1)
def get_att_score(self, dec_output, enc_output): # enc_outputs [batch_size, num_directions(=1) * n_hidden]
score = self.attn(enc_output) # score : [batch_size, n_hidden]
return torch.dot(dec_output.view(-1), score.view(-1)) # inner product make scalar value
if __name__ == '__main__':
n_step = 5 # number of cells(= number of Step)
n_hidden = 128 # number of hidden units in one cell
sentences = ['ich mochte ein bier P', 'S i want a beer', 'i want a beer E']
word_list = " ".join(sentences).split()
word_list = list(set(word_list))
word_dict = {w: i for i, w in enumerate(word_list)}
number_dict = {i: w for i, w in enumerate(word_list)}
n_class = len(word_dict) # vocab list
# hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]
hidden = torch.zeros(1, 1, n_hidden)
model = Attention()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
input_batch, output_batch, target_batch = make_batch()
# Train
for epoch in range(2000):
optimizer.zero_grad()
output, _ = model(input_batch, hidden, output_batch)
loss = criterion(output, target_batch.squeeze(0))
if (epoch + 1) % 400 == 0:
print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
loss.backward()
optimizer.step()
# Test
test_batch = [np.eye(n_class)[[word_dict[n] for n in 'SPPPP']]]
test_batch = torch.FloatTensor(test_batch)
predict, trained_attn = model(input_batch, hidden, test_batch)
predict = predict.data.max(1, keepdim=True)[1]
print(sentences[0], '->', [number_dict[n.item()] for n in predict.squeeze()])
# Show Attention
fig = plt.figure(figsize=(5, 5))
ax = fig.add_subplot(1, 1, 1)
ax.matshow(trained_attn, cmap='viridis')
ax.set_xticklabels([''] + sentences[0].split(), fontdict={'fontsize': 14})
ax.set_yticklabels([''] + sentences[2].split(), fontdict={'fontsize': 14})
plt.show()
================================================
FILE: 4-3.Bi-LSTM(Attention)/Bi-LSTM(Attention).ipynb
================================================
{
"cells": [
{
"cell_type": "code",
"metadata": {},
"source": [
"# code by Tae Hwan Jung(Jeff Jung) @graykode\n",
"# Reference : https://github.com/prakashpandey9/Text-Classification-Pytorch/blob/master/models/LSTM_Attn.py\n",
"import numpy as np\n",
"import torch\n",
"import torch.nn as nn\n",
"import torch.optim as optim\n",
"import torch.nn.functional as F\n",
"import matplotlib.pyplot as plt\n",
"\n",
"class BiLSTM_Attention(nn.Module):\n",
" def __init__(self):\n",
" super(BiLSTM_Attention, self).__init__()\n",
"\n",
" self.embedding = nn.Embedding(vocab_size, embedding_dim)\n",
" self.lstm = nn.LSTM(embedding_dim, n_hidden, bidirectional=True)\n",
" self.out = nn.Linear(n_hidden * 2, num_classes)\n",
"\n",
" # lstm_output : [batch_size, n_step, n_hidden * num_directions(=2)], F matrix\n",
" def attention_net(self, lstm_output, final_state):\n",
" hidden = final_state.view(-1, n_hidden * 2, 1) # hidden : [batch_size, n_hidden * num_directions(=2), 1(=n_layer)]\n",
" attn_weights = torch.bmm(lstm_output, hidden).squeeze(2) # attn_weights : [batch_size, n_step]\n",
" soft_attn_weights = F.softmax(attn_weights, 1)\n",
" # [batch_size, n_hidden * num_directions(=2), n_step] * [batch_size, n_step, 1] = [batch_size, n_hidden * num_directions(=2), 1]\n",
" context = torch.bmm(lstm_output.transpose(1, 2), soft_attn_weights.unsqueeze(2)).squeeze(2)\n",
" return context, soft_attn_weights.data.numpy() # context : [batch_size, n_hidden * num_directions(=2)]\n",
"\n",
" def forward(self, X):\n",
" input = self.embedding(X) # input : [batch_size, len_seq, embedding_dim]\n",
" input = input.permute(1, 0, 2) # input : [len_seq, batch_size, embedding_dim]\n",
"\n",
" hidden_state = torch.zeros(1*2, len(X), n_hidden) # [num_layers(=1) * num_directions(=2), batch_size, n_hidden]\n",
" cell_state = torch.zeros(1*2, len(X), n_hidden) # [num_layers(=1) * num_directions(=2), batch_size, n_hidden]\n",
"\n",
" # final_hidden_state, final_cell_state : [num_layers(=1) * num_directions(=2), batch_size, n_hidden]\n",
" output, (final_hidden_state, final_cell_state) = self.lstm(input, (hidden_state, cell_state))\n",
" output = output.permute(1, 0, 2) # output : [batch_size, len_seq, n_hidden]\n",
" attn_output, attention = self.attention_net(output, final_hidden_state)\n",
" return self.out(attn_output), attention # model : [batch_size, num_classes], attention : [batch_size, n_step]\n",
"\n",
"if __name__ == '__main__':\n",
" embedding_dim = 2 # embedding size\n",
" n_hidden = 5 # number of hidden units in one cell\n",
" num_classes = 2 # 0 or 1\n",
"\n",
" # 3 words sentences (=sequence_length is 3)\n",
" sentences = [\"i love you\", \"he loves me\", \"she likes baseball\", \"i hate you\", \"sorry for that\", \"this is awful\"]\n",
" labels = [1, 1, 1, 0, 0, 0] # 1 is good, 0 is not good.\n",
"\n",
" word_list = \" \".join(sentences).split()\n",
" word_list = list(set(word_list))\n",
" word_dict = {w: i for i, w in enumerate(word_list)}\n",
" vocab_size = len(word_dict)\n",
"\n",
" model = BiLSTM_Attention()\n",
"\n",
" criterion = nn.CrossEntropyLoss()\n",
" optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
"\n",
" inputs = torch.LongTensor([np.asarray([word_dict[n] for n in sen.split()]) for sen in sentences])\n",
" targets = torch.LongTensor([out for out in labels]) # To using Torch Softmax Loss function\n",
"\n",
" # Training\n",
" for epoch in range(5000):\n",
" optimizer.zero_grad()\n",
" output, attention = model(inputs)\n",
" loss = criterion(output, targets)\n",
" if (epoch + 1) % 1000 == 0:\n",
" print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n",
"\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
" # Test\n",
" test_text = 'sorry hate you'\n",
" tests = [np.asarray([word_dict[n] for n in test_text.split()])]\n",
" test_batch = torch.LongTensor(tests)\n",
"\n",
" # Predict\n",
" predict, _ = model(test_batch)\n",
" predict = predict.data.max(1, keepdim=True)[1]\n",
" if predict[0][0] == 0:\n",
" print(test_text,\"is Bad Mean...\")\n",
" else:\n",
" print(test_text,\"is Good Mean!!\")\n",
"\n",
" fig = plt.figure(figsize=(6, 3)) # [batch_size, n_step]\n",
" ax = fig.add_subplot(1, 1, 1)\n",
" ax.matshow(attention, cmap='viridis')\n",
" ax.set_xticklabels(['']+['first_word', 'second_word', 'third_word'], fontdict={'fontsize': 14}, rotation=90)\n",
" ax.set_yticklabels(['']+['batch_1', 'batch_2', 'batch_3', 'batch_4', 'batch_5', 'batch_6'], fontdict={'fontsize': 14})\n",
" plt.show()"
],
"outputs": [],
"execution_count": null
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
================================================
FILE: 4-3.Bi-LSTM(Attention)/Bi-LSTM(Attention).py
================================================
# %%
# code by Tae Hwan Jung(Jeff Jung) @graykode
# Reference : https://github.com/prakashpandey9/Text-Classification-Pytorch/blob/master/models/LSTM_Attn.py
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
class BiLSTM_Attention(nn.Module):
def __init__(self):
super(BiLSTM_Attention, self).__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.lstm = nn.LSTM(embedding_dim, n_hidden, bidirectional=True)
self.out = nn.Linear(n_hidden * 2, num_classes)
# lstm_output : [batch_size, n_step, n_hidden * num_directions(=2)], F matrix
def attention_net(self, lstm_output, final_state):
hidden = final_state.view(-1, n_hidden * 2, 1) # hidden : [batch_size, n_hidden * num_directions(=2), 1(=n_layer)]
attn_weights = torch.bmm(lstm_output, hidden).squeeze(2) # attn_weights : [batch_size, n_step]
soft_attn_weights = F.softmax(attn_weights, 1)
# [batch_size, n_hidden * num_directions(=2), n_step] * [batch_size, n_step, 1] = [batch_size, n_hidden * num_directions(=2), 1]
context = torch.bmm(lstm_output.transpose(1, 2), soft_attn_weights.unsqueeze(2)).squeeze(2)
return context, soft_attn_weights.data.numpy() # context : [batch_size, n_hidden * num_directions(=2)]
def forward(self, X):
input = self.embedding(X) # input : [batch_size, len_seq, embedding_dim]
input = input.permute(1, 0, 2) # input : [len_seq, batch_size, embedding_dim]
hidden_state = torch.zeros(1*2, len(X), n_hidden) # [num_layers(=1) * num_directions(=2), batch_size, n_hidden]
cell_state = torch.zeros(1*2, len(X), n_hidden) # [num_layers(=1) * num_directions(=2), batch_size, n_hidden]
# final_hidden_state, final_cell_state : [num_layers(=1) * num_directions(=2), batch_size, n_hidden]
output, (final_hidden_state, final_cell_state) = self.lstm(input, (hidden_state, cell_state))
output = output.permute(1, 0, 2) # output : [batch_size, len_seq, n_hidden]
attn_output, attention = self.attention_net(output, final_hidden_state)
return self.out(attn_output), attention # model : [batch_size, num_classes], attention : [batch_size, n_step]
if __name__ == '__main__':
embedding_dim = 2 # embedding size
n_hidden = 5 # number of hidden units in one cell
num_classes = 2 # 0 or 1
# 3 words sentences (=sequence_length is 3)
sentences = ["i love you", "he loves me", "she likes baseball", "i hate you", "sorry for that", "this is awful"]
labels = [1, 1, 1, 0, 0, 0] # 1 is good, 0 is not good.
word_list = " ".join(sentences).split()
word_list = list(set(word_list))
word_dict = {w: i for i, w in enumerate(word_list)}
vocab_size = len(word_dict)
model = BiLSTM_Attention()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
inputs = torch.LongTensor([np.asarray([word_dict[n] for n in sen.split()]) for sen in sentences])
targets = torch.LongTensor([out for out in labels]) # To using Torch Softmax Loss function
# Training
for epoch in range(5000):
optimizer.zero_grad()
output, attention = model(inputs)
loss = criterion(output, targets)
if (epoch + 1) % 1000 == 0:
print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
loss.backward()
optimizer.step()
# Test
test_text = 'sorry hate you'
tests = [np.asarray([word_dict[n] for n in test_text.split()])]
test_batch = torch.LongTensor(tests)
# Predict
predict, _ = model(test_batch)
predict = predict.data.max(1, keepdim=True)[1]
if predict[0][0] == 0:
print(test_text,"is Bad Mean...")
else:
print(test_text,"is Good Mean!!")
fig = plt.figure(figsize=(6, 3)) # [batch_size, n_step]
ax = fig.add_subplot(1, 1, 1)
ax.matshow(attention, cmap='viridis')
ax.set_xticklabels(['']+['first_word', 'second_word', 'third_word'], fontdict={'fontsize': 14}, rotation=90)
ax.set_yticklabels(['']+['batch_1', 'batch_2', 'batch_3', 'batch_4', 'batch_5', 'batch_6'], fontdict={'fontsize': 14})
plt.show()
================================================
FILE: 5-1.Transformer/Transformer(Greedy_decoder).ipynb
================================================
{
"cells": [
{
"cell_type": "code",
"metadata": {},
"source": [
"# code by Tae Hwan Jung(Jeff Jung) @graykode, Derek Miller @dmmiller612\n",
"# Reference : https://github.com/jadore801120/attention-is-all-you-need-pytorch\n",
"# https://github.com/JayParks/transformer\n",
"import numpy as np\n",
"import torch\n",
"import torch.nn as nn\n",
"import torch.optim as optim\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# S: Symbol that shows starting of decoding input\n",
"# E: Symbol that shows starting of decoding output\n",
"# P: Symbol that will fill in blank sequence if current batch data size is short than time steps\n",
"\n",
"def make_batch():\n",
" input_batch = [[src_vocab[n] for n in sentences[0].split()]]\n",
" output_batch = [[tgt_vocab[n] for n in sentences[1].split()]]\n",
" target_batch = [[tgt_vocab[n] for n in sentences[2].split()]]\n",
" return torch.LongTensor(input_batch), torch.LongTensor(output_batch), torch.LongTensor(target_batch)\n",
"\n",
"def get_sinusoid_encoding_table(n_position, d_model):\n",
" def cal_angle(position, hid_idx):\n",
" return position / np.power(10000, 2 * (hid_idx // 2) / d_model)\n",
" def get_posi_angle_vec(position):\n",
" return [cal_angle(position, hid_j) for hid_j in range(d_model)]\n",
"\n",
" sinusoid_table = np.array([get_posi_angle_vec(pos_i) for pos_i in range(n_position)])\n",
" sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2]) # dim 2i\n",
" sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2]) # dim 2i+1\n",
" return torch.FloatTensor(sinusoid_table)\n",
"\n",
"def get_attn_pad_mask(seq_q, seq_k):\n",
" # print(seq_q)\n",
" batch_size, len_q = seq_q.size()\n",
" batch_size, len_k = seq_k.size()\n",
" # eq(zero) is PAD token\n",
" pad_attn_mask = seq_k.data.eq(0).unsqueeze(1) # batch_size x 1 x len_k(=len_q), one is masking\n",
" return pad_attn_mask.expand(batch_size, len_q, len_k) # batch_size x len_q x len_k\n",
"\n",
"def get_attn_subsequent_mask(seq):\n",
" attn_shape = [seq.size(0), seq.size(1), seq.size(1)]\n",
" subsequent_mask = np.triu(np.ones(attn_shape), k=1)\n",
" subsequent_mask = torch.from_numpy(subsequent_mask).byte()\n",
" return subsequent_mask\n",
"\n",
"class ScaledDotProductAttention(nn.Module):\n",
" def __init__(self):\n",
" super(ScaledDotProductAttention, self).__init__()\n",
"\n",
" def forward(self, Q, K, V, attn_mask):\n",
" scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(d_k) # scores : [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]\n",
" scores.masked_fill_(attn_mask, -1e9) # Fills elements of self tensor with value where mask is one.\n",
" attn = nn.Softmax(dim=-1)(scores)\n",
" context = torch.matmul(attn, V)\n",
" return context, attn\n",
"\n",
"class MultiHeadAttention(nn.Module):\n",
" def __init__(self):\n",
" super(MultiHeadAttention, self).__init__()\n",
" self.W_Q = nn.Linear(d_model, d_k * n_heads)\n",
" self.W_K = nn.Linear(d_model, d_k * n_heads)\n",
" self.W_V = nn.Linear(d_model, d_v * n_heads)\n",
" self.linear = nn.Linear(n_heads * d_v, d_model)\n",
" self.layer_norm = nn.LayerNorm(d_model)\n",
"\n",
" def forward(self, Q, K, V, attn_mask):\n",
" # q: [batch_size x len_q x d_model], k: [batch_size x len_k x d_model], v: [batch_size x len_k x d_model]\n",
" residual, batch_size = Q, Q.size(0)\n",
" # (B, S, D) -proj-> (B, S, D) -split-> (B, S, H, W) -trans-> (B, H, S, W)\n",
" q_s = self.W_Q(Q).view(batch_size, -1, n_heads, d_k).transpose(1,2) # q_s: [batch_size x n_heads x len_q x d_k]\n",
" k_s = self.W_K(K).view(batch_size, -1, n_heads, d_k).transpose(1,2) # k_s: [batch_size x n_heads x len_k x d_k]\n",
" v_s = self.W_V(V).view(batch_size, -1, n_heads, d_v).transpose(1,2) # v_s: [batch_size x n_heads x len_k x d_v]\n",
"\n",
" attn_mask = attn_mask.unsqueeze(1).repeat(1, n_heads, 1, 1) # attn_mask : [batch_size x n_heads x len_q x len_k]\n",
"\n",
" # context: [batch_size x n_heads x len_q x d_v], attn: [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]\n",
" context, attn = ScaledDotProductAttention()(q_s, k_s, v_s, attn_mask)\n",
" context = context.transpose(1, 2).contiguous().view(batch_size, -1, n_heads * d_v) # context: [batch_size x len_q x n_heads * d_v]\n",
" output = self.linear(context)\n",
" return self.layer_norm(output + residual), attn # output: [batch_size x len_q x d_model]\n",
"\n",
"class PoswiseFeedForwardNet(nn.Module):\n",
" def __init__(self):\n",
" super(PoswiseFeedForwardNet, self).__init__()\n",
" self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)\n",
" self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)\n",
" self.layer_norm = nn.LayerNorm(d_model)\n",
"\n",
" def forward(self, inputs):\n",
" residual = inputs # inputs : [batch_size, len_q, d_model]\n",
" output = nn.ReLU()(self.conv1(inputs.transpose(1, 2)))\n",
" output = self.conv2(output).transpose(1, 2)\n",
" return self.layer_norm(output + residual)\n",
"\n",
"class EncoderLayer(nn.Module):\n",
" def __init__(self):\n",
" super(EncoderLayer, self).__init__()\n",
" self.enc_self_attn = MultiHeadAttention()\n",
" self.pos_ffn = PoswiseFeedForwardNet()\n",
"\n",
" def forward(self, enc_inputs, enc_self_attn_mask):\n",
" enc_outputs, attn = self.enc_self_attn(enc_inputs, enc_inputs, enc_inputs, enc_self_attn_mask) # enc_inputs to same Q,K,V\n",
" enc_outputs = self.pos_ffn(enc_outputs) # enc_outputs: [batch_size x len_q x d_model]\n",
" return enc_outputs, attn\n",
"\n",
"class DecoderLayer(nn.Module):\n",
" def __init__(self):\n",
" super(DecoderLayer, self).__init__()\n",
" self.dec_self_attn = MultiHeadAttention()\n",
" self.dec_enc_attn = MultiHeadAttention()\n",
" self.pos_ffn = PoswiseFeedForwardNet()\n",
"\n",
" def forward(self, dec_inputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask):\n",
" dec_outputs, dec_self_attn = self.dec_self_attn(dec_inputs, dec_inputs, dec_inputs, dec_self_attn_mask)\n",
" dec_outputs, dec_enc_attn = self.dec_enc_attn(dec_outputs, enc_outputs, enc_outputs, dec_enc_attn_mask)\n",
" dec_outputs = self.pos_ffn(dec_outputs)\n",
" return dec_outputs, dec_self_attn, dec_enc_attn\n",
"\n",
"class Encoder(nn.Module):\n",
" def __init__(self):\n",
" super(Encoder, self).__init__()\n",
" self.src_emb = nn.Embedding(src_vocab_size, d_model)\n",
" self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(src_len+1, d_model),freeze=True)\n",
" self.layers = nn.ModuleList([EncoderLayer() for _ in range(n_layers)])\n",
"\n",
" def forward(self, enc_inputs): # enc_inputs : [batch_size x source_len]\n",
" enc_outputs = self.src_emb(enc_inputs) + self.pos_emb(torch.LongTensor([[1,2,3,4,0]]))\n",
" enc_self_attn_mask = get_attn_pad_mask(enc_inputs, enc_inputs)\n",
" enc_self_attns = []\n",
" for layer in self.layers:\n",
" enc_outputs, enc_self_attn = layer(enc_outputs, enc_self_attn_mask)\n",
" enc_self_attns.append(enc_self_attn)\n",
" return enc_outputs, enc_self_attns\n",
"\n",
"class Decoder(nn.Module):\n",
" def __init__(self):\n",
" super(Decoder, self).__init__()\n",
" self.tgt_emb = nn.Embedding(tgt_vocab_size, d_model)\n",
" self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(tgt_len+1, d_model),freeze=True)\n",
" self.layers = nn.ModuleList([DecoderLayer() for _ in range(n_layers)])\n",
"\n",
" def forward(self, dec_inputs, enc_inputs, enc_outputs): # dec_inputs : [batch_size x target_len]\n",
" dec_outputs = self.tgt_emb(dec_inputs) + self.pos_emb(torch.LongTensor([[5,1,2,3,4]]))\n",
" dec_self_attn_pad_mask = get_attn_pad_mask(dec_inputs, dec_inputs)\n",
" dec_self_attn_subsequent_mask = get_attn_subsequent_mask(dec_inputs)\n",
" dec_self_attn_mask = torch.gt((dec_self_attn_pad_mask + dec_self_attn_subsequent_mask), 0)\n",
"\n",
" dec_enc_attn_mask = get_attn_pad_mask(dec_inputs, enc_inputs)\n",
"\n",
" dec_self_attns, dec_enc_attns = [], []\n",
" for layer in self.layers:\n",
" dec_outputs, dec_self_attn, dec_enc_attn = layer(dec_outputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask)\n",
" dec_self_attns.append(dec_self_attn)\n",
" dec_enc_attns.append(dec_enc_attn)\n",
" return dec_outputs, dec_self_attns, dec_enc_attns\n",
"\n",
"class Transformer(nn.Module):\n",
" def __init__(self):\n",
" super(Transformer, self).__init__()\n",
" self.encoder = Encoder()\n",
" self.decoder = Decoder()\n",
" self.projection = nn.Linear(d_model, tgt_vocab_size, bias=False)\n",
" def forward(self, enc_inputs, dec_inputs):\n",
" enc_outputs, enc_self_attns = self.encoder(enc_inputs)\n",
" dec_outputs, dec_self_attns, dec_enc_attns = self.decoder(dec_inputs, enc_inputs, enc_outputs)\n",
" dec_logits = self.projection(dec_outputs) # dec_logits : [batch_size x src_vocab_size x tgt_vocab_size]\n",
" return dec_logits.view(-1, dec_logits.size(-1)), enc_self_attns, dec_self_attns, dec_enc_attns\n",
"\n",
"def greedy_decoder(model, enc_input, start_symbol):\n",
" \"\"\"\n",
" For simplicity, a Greedy Decoder is Beam search when K=1. This is necessary for inference as we don't know the\n",
" target sequence input. Therefore we try to generate the target input word by word, then feed it into the transformer.\n",
" Starting Reference: http://nlp.seas.harvard.edu/2018/04/03/attention.html#greedy-decoding\n",
" :param model: Transformer Model\n",
" :param enc_input: The encoder input\n",
" :param start_symbol: The start symbol. In this example it is 'S' which corresponds to index 4\n",
" :return: The target input\n",
" \"\"\"\n",
" enc_outputs, enc_self_attns = model.encoder(enc_input)\n",
" dec_input = torch.zeros(1, 5).type_as(enc_input.data)\n",
" next_symbol = start_symbol\n",
" for i in range(0, 5):\n",
" dec_input[0][i] = next_symbol\n",
" dec_outputs, _, _ = model.decoder(dec_input, enc_input, enc_outputs)\n",
" projected = model.projection(dec_outputs)\n",
" prob = projected.squeeze(0).max(dim=-1, keepdim=False)[1]\n",
" next_word = prob.data[i]\n",
" next_symbol = next_word.item()\n",
" return dec_input\n",
"\n",
"def showgraph(attn):\n",
" attn = attn[-1].squeeze(0)[0]\n",
" attn = attn.squeeze(0).data.numpy()\n",
" fig = plt.figure(figsize=(n_heads, n_heads)) # [n_heads, n_heads]\n",
" ax = fig.add_subplot(1, 1, 1)\n",
" ax.matshow(attn, cmap='viridis')\n",
" ax.set_xticklabels(['']+sentences[0].split(), fontdict={'fontsize': 14}, rotation=90)\n",
" ax.set_yticklabels(['']+sentences[2].split(), fontdict={'fontsize': 14})\n",
" plt.show()\n",
"\n",
"if __name__ == '__main__':\n",
" sentences = ['ich mochte ein bier P', 'S i want a beer', 'i want a beer E']\n",
" # Transformer Parameters\n",
" # Padding Should be Zero index\n",
" src_vocab = {'P': 0, 'ich': 1, 'mochte': 2, 'ein': 3, 'bier': 4}\n",
" src_vocab_size = len(src_vocab)\n",
"\n",
" tgt_vocab = {'P': 0, 'i': 1, 'want': 2, 'a': 3, 'beer': 4, 'S': 5, 'E': 6}\n",
" number_dict = {i: w for i, w in enumerate(tgt_vocab)}\n",
" tgt_vocab_size = len(tgt_vocab)\n",
"\n",
" src_len = 5 # length of source\n",
" tgt_len = 5 # length of target\n",
"\n",
" d_model = 512 # Embedding Size\n",
" d_ff = 2048 # FeedForward dimension\n",
" d_k = d_v = 64 # dimension of K(=Q), V\n",
" n_layers = 6 # number of Encoder of Decoder Layer\n",
" n_heads = 8 # number of heads in Multi-Head Attention\n",
"\n",
" model = Transformer()\n",
"\n",
" criterion = nn.CrossEntropyLoss()\n",
" optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
"\n",
" enc_inputs, dec_inputs, target_batch = make_batch()\n",
"\n",
" for epoch in range(20):\n",
" optimizer.zero_grad()\n",
" outputs, enc_self_attns, dec_self_attns, dec_enc_attns = model(enc_inputs, dec_inputs)\n",
" loss = criterion(outputs, target_batch.contiguous().view(-1))\n",
" print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
" # Test\n",
" greedy_dec_input = greedy_decoder(model, enc_inputs, start_symbol=tgt_vocab[\"S\"])\n",
" predict, _, _, _ = model(enc_inputs, greedy_dec_input)\n",
" predict = predict.data.max(1, keepdim=True)[1]\n",
" print(sentences[0], '->', [number_dict[n.item()] for n in predict.squeeze()])\n",
"\n",
" print('first head of last state enc_self_attns')\n",
" showgraph(enc_self_attns)\n",
"\n",
" print('first head of last state dec_self_attns')\n",
" showgraph(dec_self_attns)\n",
"\n",
" print('first head of last state dec_enc_attns')\n",
" showgraph(dec_enc_attns)"
],
"outputs": [],
"execution_count": null
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
================================================
FILE: 5-1.Transformer/Transformer(Greedy_decoder).py
================================================
# %%
# code by Tae Hwan Jung(Jeff Jung) @graykode, Derek Miller @dmmiller612
# Reference : https://github.com/jadore801120/attention-is-all-you-need-pytorch
# https://github.com/JayParks/transformer
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
# S: Symbol that shows starting of decoding input
# E: Symbol that shows starting of decoding output
# P: Symbol that will fill in blank sequence if current batch data size is short than time steps
def make_batch():
input_batch = [[src_vocab[n] for n in sentences[0].split()]]
output_batch = [[tgt_vocab[n] for n in sentences[1].split()]]
target_batch = [[tgt_vocab[n] for n in sentences[2].split()]]
return torch.LongTensor(input_batch), torch.LongTensor(output_batch), torch.LongTensor(target_batch)
def get_sinusoid_encoding_table(n_position, d_model):
def cal_angle(position, hid_idx):
return position / np.power(10000, 2 * (hid_idx // 2) / d_model)
def get_posi_angle_vec(position):
return [cal_angle(position, hid_j) for hid_j in range(d_model)]
sinusoid_table = np.array([get_posi_angle_vec(pos_i) for pos_i in range(n_position)])
sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2]) # dim 2i
sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2]) # dim 2i+1
return torch.FloatTensor(sinusoid_table)
def get_attn_pad_mask(seq_q, seq_k):
# print(seq_q)
batch_size, len_q = seq_q.size()
batch_size, len_k = seq_k.size()
# eq(zero) is PAD token
pad_attn_mask = seq_k.data.eq(0).unsqueeze(1) # batch_size x 1 x len_k(=len_q), one is masking
return pad_attn_mask.expand(batch_size, len_q, len_k) # batch_size x len_q x len_k
def get_attn_subsequent_mask(seq):
attn_shape = [seq.size(0), seq.size(1), seq.size(1)]
subsequent_mask = np.triu(np.ones(attn_shape), k=1)
subsequent_mask = torch.from_numpy(subsequent_mask).byte()
return subsequent_mask
class ScaledDotProductAttention(nn.Module):
def __init__(self):
super(ScaledDotProductAttention, self).__init__()
def forward(self, Q, K, V, attn_mask):
scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(d_k) # scores : [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]
scores.masked_fill_(attn_mask, -1e9) # Fills elements of self tensor with value where mask is one.
attn = nn.Softmax(dim=-1)(scores)
context = torch.matmul(attn, V)
return context, attn
class MultiHeadAttention(nn.Module):
def __init__(self):
super(MultiHeadAttention, self).__init__()
self.W_Q = nn.Linear(d_model, d_k * n_heads)
self.W_K = nn.Linear(d_model, d_k * n_heads)
self.W_V = nn.Linear(d_model, d_v * n_heads)
self.linear = nn.Linear(n_heads * d_v, d_model)
self.layer_norm = nn.LayerNorm(d_model)
def forward(self, Q, K, V, attn_mask):
# q: [batch_size x len_q x d_model], k: [batch_size x len_k x d_model], v: [batch_size x len_k x d_model]
residual, batch_size = Q, Q.size(0)
# (B, S, D) -proj-> (B, S, D) -split-> (B, S, H, W) -trans-> (B, H, S, W)
q_s = self.W_Q(Q).view(batch_size, -1, n_heads, d_k).transpose(1,2) # q_s: [batch_size x n_heads x len_q x d_k]
k_s = self.W_K(K).view(batch_size, -1, n_heads, d_k).transpose(1,2) # k_s: [batch_size x n_heads x len_k x d_k]
v_s = self.W_V(V).view(batch_size, -1, n_heads, d_v).transpose(1,2) # v_s: [batch_size x n_heads x len_k x d_v]
attn_mask = attn_mask.unsqueeze(1).repeat(1, n_heads, 1, 1) # attn_mask : [batch_size x n_heads x len_q x len_k]
# context: [batch_size x n_heads x len_q x d_v], attn: [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]
context, attn = ScaledDotProductAttention()(q_s, k_s, v_s, attn_mask)
context = context.transpose(1, 2).contiguous().view(batch_size, -1, n_heads * d_v) # context: [batch_size x len_q x n_heads * d_v]
output = self.linear(context)
return self.layer_norm(output + residual), attn # output: [batch_size x len_q x d_model]
class PoswiseFeedForwardNet(nn.Module):
def __init__(self):
super(PoswiseFeedForwardNet, self).__init__()
self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
self.layer_norm = nn.LayerNorm(d_model)
def forward(self, inputs):
residual = inputs # inputs : [batch_size, len_q, d_model]
output = nn.ReLU()(self.conv1(inputs.transpose(1, 2)))
output = self.conv2(output).transpose(1, 2)
return self.layer_norm(output + residual)
class EncoderLayer(nn.Module):
def __init__(self):
super(EncoderLayer, self).__init__()
self.enc_self_attn = MultiHeadAttention()
self.pos_ffn = PoswiseFeedForwardNet()
def forward(self, enc_inputs, enc_self_attn_mask):
enc_outputs, attn = self.enc_self_attn(enc_inputs, enc_inputs, enc_inputs, enc_self_attn_mask) # enc_inputs to same Q,K,V
enc_outputs = self.pos_ffn(enc_outputs) # enc_outputs: [batch_size x len_q x d_model]
return enc_outputs, attn
class DecoderLayer(nn.Module):
def __init__(self):
super(DecoderLayer, self).__init__()
self.dec_self_attn = MultiHeadAttention()
self.dec_enc_attn = MultiHeadAttention()
self.pos_ffn = PoswiseFeedForwardNet()
def forward(self, dec_inputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask):
dec_outputs, dec_self_attn = self.dec_self_attn(dec_inputs, dec_inputs, dec_inputs, dec_self_attn_mask)
dec_outputs, dec_enc_attn = self.dec_enc_attn(dec_outputs, enc_outputs, enc_outputs, dec_enc_attn_mask)
dec_outputs = self.pos_ffn(dec_outputs)
return dec_outputs, dec_self_attn, dec_enc_attn
class Encoder(nn.Module):
def __init__(self):
super(Encoder, self).__init__()
self.src_emb = nn.Embedding(src_vocab_size, d_model)
self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(src_len+1, d_model),freeze=True)
self.layers = nn.ModuleList([EncoderLayer() for _ in range(n_layers)])
def forward(self, enc_inputs): # enc_inputs : [batch_size x source_len]
enc_outputs = self.src_emb(enc_inputs) + self.pos_emb(torch.LongTensor([[1,2,3,4,0]]))
enc_self_attn_mask = get_attn_pad_mask(enc_inputs, enc_inputs)
enc_self_attns = []
for layer in self.layers:
enc_outputs, enc_self_attn = layer(enc_outputs, enc_self_attn_mask)
enc_self_attns.append(enc_self_attn)
return enc_outputs, enc_self_attns
class Decoder(nn.Module):
def __init__(self):
super(Decoder, self).__init__()
self.tgt_emb = nn.Embedding(tgt_vocab_size, d_model)
self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(tgt_len+1, d_model),freeze=True)
self.layers = nn.ModuleList([DecoderLayer() for _ in range(n_layers)])
def forward(self, dec_inputs, enc_inputs, enc_outputs): # dec_inputs : [batch_size x target_len]
dec_outputs = self.tgt_emb(dec_inputs) + self.pos_emb(torch.LongTensor([[5,1,2,3,4]]))
dec_self_attn_pad_mask = get_attn_pad_mask(dec_inputs, dec_inputs)
dec_self_attn_subsequent_mask = get_attn_subsequent_mask(dec_inputs)
dec_self_attn_mask = torch.gt((dec_self_attn_pad_mask + dec_self_attn_subsequent_mask), 0)
dec_enc_attn_mask = get_attn_pad_mask(dec_inputs, enc_inputs)
dec_self_attns, dec_enc_attns = [], []
for layer in self.layers:
dec_outputs, dec_self_attn, dec_enc_attn = layer(dec_outputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask)
dec_self_attns.append(dec_self_attn)
dec_enc_attns.append(dec_enc_attn)
return dec_outputs, dec_self_attns, dec_enc_attns
class Transformer(nn.Module):
def __init__(self):
super(Transformer, self).__init__()
self.encoder = Encoder()
self.decoder = Decoder()
self.projection = nn.Linear(d_model, tgt_vocab_size, bias=False)
def forward(self, enc_inputs, dec_inputs):
enc_outputs, enc_self_attns = self.encoder(enc_inputs)
dec_outputs, dec_self_attns, dec_enc_attns = self.decoder(dec_inputs, enc_inputs, enc_outputs)
dec_logits = self.projection(dec_outputs) # dec_logits : [batch_size x src_vocab_size x tgt_vocab_size]
return dec_logits.view(-1, dec_logits.size(-1)), enc_self_attns, dec_self_attns, dec_enc_attns
def greedy_decoder(model, enc_input, start_symbol):
"""
For simplicity, a Greedy Decoder is Beam search when K=1. This is necessary for inference as we don't know the
target sequence input. Therefore we try to generate the target input word by word, then feed it into the transformer.
Starting Reference: http://nlp.seas.harvard.edu/2018/04/03/attention.html#greedy-decoding
:param model: Transformer Model
:param enc_input: The encoder input
:param start_symbol: The start symbol. In this example it is 'S' which corresponds to index 4
:return: The target input
"""
enc_outputs, enc_self_attns = model.encoder(enc_input)
dec_input = torch.zeros(1, 5).type_as(enc_input.data)
next_symbol = start_symbol
for i in range(0, 5):
dec_input[0][i] = next_symbol
dec_outputs, _, _ = model.decoder(dec_input, enc_input, enc_outputs)
projected = model.projection(dec_outputs)
prob = projected.squeeze(0).max(dim=-1, keepdim=False)[1]
next_word = prob.data[i]
next_symbol = next_word.item()
return dec_input
def showgraph(attn):
attn = attn[-1].squeeze(0)[0]
attn = attn.squeeze(0).data.numpy()
fig = plt.figure(figsize=(n_heads, n_heads)) # [n_heads, n_heads]
ax = fig.add_subplot(1, 1, 1)
ax.matshow(attn, cmap='viridis')
ax.set_xticklabels(['']+sentences[0].split(), fontdict={'fontsize': 14}, rotation=90)
ax.set_yticklabels(['']+sentences[2].split(), fontdict={'fontsize': 14})
plt.show()
if __name__ == '__main__':
sentences = ['ich mochte ein bier P', 'S i want a beer', 'i want a beer E']
# Transformer Parameters
# Padding Should be Zero index
src_vocab = {'P': 0, 'ich': 1, 'mochte': 2, 'ein': 3, 'bier': 4}
src_vocab_size = len(src_vocab)
tgt_vocab = {'P': 0, 'i': 1, 'want': 2, 'a': 3, 'beer': 4, 'S': 5, 'E': 6}
number_dict = {i: w for i, w in enumerate(tgt_vocab)}
tgt_vocab_size = len(tgt_vocab)
src_len = 5 # length of source
tgt_len = 5 # length of target
d_model = 512 # Embedding Size
d_ff = 2048 # FeedForward dimension
d_k = d_v = 64 # dimension of K(=Q), V
n_layers = 6 # number of Encoder of Decoder Layer
n_heads = 8 # number of heads in Multi-Head Attention
model = Transformer()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
enc_inputs, dec_inputs, target_batch = make_batch()
for epoch in range(20):
optimizer.zero_grad()
outputs, enc_self_attns, dec_self_attns, dec_enc_attns = model(enc_inputs, dec_inputs)
loss = criterion(outputs, target_batch.contiguous().view(-1))
print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
loss.backward()
optimizer.step()
# Test
greedy_dec_input = greedy_decoder(model, enc_inputs, start_symbol=tgt_vocab["S"])
predict, _, _, _ = model(enc_inputs, greedy_dec_input)
predict = predict.data.max(1, keepdim=True)[1]
print(sentences[0], '->', [number_dict[n.item()] for n in predict.squeeze()])
print('first head of last state enc_self_attns')
showgraph(enc_self_attns)
print('first head of last state dec_self_attns')
showgraph(dec_self_attns)
print('first head of last state dec_enc_attns')
showgraph(dec_enc_attns)
================================================
FILE: 5-1.Transformer/Transformer.ipynb
================================================
{
"cells": [
{
"cell_type": "code",
"metadata": {},
"source": [
"# code by Tae Hwan Jung(Jeff Jung) @graykode, Derek Miller @dmmiller612\n",
"# Reference : https://github.com/jadore801120/attention-is-all-you-need-pytorch\n",
"# https://github.com/JayParks/transformer\n",
"import numpy as np\n",
"import torch\n",
"import torch.nn as nn\n",
"import torch.optim as optim\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# S: Symbol that shows starting of decoding input\n",
"# E: Symbol that shows starting of decoding output\n",
"# P: Symbol that will fill in blank sequence if current batch data size is short than time steps\n",
"\n",
"def make_batch(sentences):\n",
" input_batch = [[src_vocab[n] for n in sentences[0].split()]]\n",
" output_batch = [[tgt_vocab[n] for n in sentences[1].split()]]\n",
" target_batch = [[tgt_vocab[n] for n in sentences[2].split()]]\n",
" return torch.LongTensor(input_batch), torch.LongTensor(output_batch), torch.LongTensor(target_batch)\n",
"\n",
"def get_sinusoid_encoding_table(n_position, d_model):\n",
" def cal_angle(position, hid_idx):\n",
" return position / np.power(10000, 2 * (hid_idx // 2) / d_model)\n",
" def get_posi_angle_vec(position):\n",
" return [cal_angle(position, hid_j) for hid_j in range(d_model)]\n",
"\n",
" sinusoid_table = np.array([get_posi_angle_vec(pos_i) for pos_i in range(n_position)])\n",
" sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2]) # dim 2i\n",
" sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2]) # dim 2i+1\n",
" return torch.FloatTensor(sinusoid_table)\n",
"\n",
"def get_attn_pad_mask(seq_q, seq_k):\n",
" batch_size, len_q = seq_q.size()\n",
" batch_size, len_k = seq_k.size()\n",
" # eq(zero) is PAD token\n",
" pad_attn_mask = seq_k.data.eq(0).unsqueeze(1) # batch_size x 1 x len_k(=len_q), one is masking\n",
" return pad_attn_mask.expand(batch_size, len_q, len_k) # batch_size x len_q x len_k\n",
"\n",
"def get_attn_subsequent_mask(seq):\n",
" attn_shape = [seq.size(0), seq.size(1), seq.size(1)]\n",
" subsequent_mask = np.triu(np.ones(attn_shape), k=1)\n",
" subsequent_mask = torch.from_numpy(subsequent_mask).byte()\n",
" return subsequent_mask\n",
"\n",
"class ScaledDotProductAttention(nn.Module):\n",
" def __init__(self):\n",
" super(ScaledDotProductAttention, self).__init__()\n",
"\n",
" def forward(self, Q, K, V, attn_mask):\n",
" scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(d_k) # scores : [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]\n",
" scores.masked_fill_(attn_mask, -1e9) # Fills elements of self tensor with value where mask is one.\n",
" attn = nn.Softmax(dim=-1)(scores)\n",
" context = torch.matmul(attn, V)\n",
" return context, attn\n",
"\n",
"class MultiHeadAttention(nn.Module):\n",
" def __init__(self):\n",
" super(MultiHeadAttention, self).__init__()\n",
" self.W_Q = nn.Linear(d_model, d_k * n_heads)\n",
" self.W_K = nn.Linear(d_model, d_k * n_heads)\n",
" self.W_V = nn.Linear(d_model, d_v * n_heads)\n",
" self.linear = nn.Linear(n_heads * d_v, d_model)\n",
" self.layer_norm = nn.LayerNorm(d_model)\n",
"\n",
" def forward(self, Q, K, V, attn_mask):\n",
" # q: [batch_size x len_q x d_model], k: [batch_size x len_k x d_model], v: [batch_size x len_k x d_model]\n",
" residual, batch_size = Q, Q.size(0)\n",
" # (B, S, D) -proj-> (B, S, D) -split-> (B, S, H, W) -trans-> (B, H, S, W)\n",
" q_s = self.W_Q(Q).view(batch_size, -1, n_heads, d_k).transpose(1,2) # q_s: [batch_size x n_heads x len_q x d_k]\n",
" k_s = self.W_K(K).view(batch_size, -1, n_heads, d_k).transpose(1,2) # k_s: [batch_size x n_heads x len_k x d_k]\n",
" v_s = self.W_V(V).view(batch_size, -1, n_heads, d_v).transpose(1,2) # v_s: [batch_size x n_heads x len_k x d_v]\n",
"\n",
" attn_mask = attn_mask.unsqueeze(1).repeat(1, n_heads, 1, 1) # attn_mask : [batch_size x n_heads x len_q x len_k]\n",
"\n",
" # context: [batch_size x n_heads x len_q x d_v], attn: [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]\n",
" context, attn = ScaledDotProductAttention()(q_s, k_s, v_s, attn_mask)\n",
" context = context.transpose(1, 2).contiguous().view(batch_size, -1, n_heads * d_v) # context: [batch_size x len_q x n_heads * d_v]\n",
" output = self.linear(context)\n",
" return self.layer_norm(output + residual), attn # output: [batch_size x len_q x d_model]\n",
"\n",
"class PoswiseFeedForwardNet(nn.Module):\n",
" def __init__(self):\n",
" super(PoswiseFeedForwardNet, self).__init__()\n",
" self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)\n",
" self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)\n",
" self.layer_norm = nn.LayerNorm(d_model)\n",
"\n",
" def forward(self, inputs):\n",
" residual = inputs # inputs : [batch_size, len_q, d_model]\n",
" output = nn.ReLU()(self.conv1(inputs.transpose(1, 2)))\n",
" output = self.conv2(output).transpose(1, 2)\n",
" return self.layer_norm(output + residual)\n",
"\n",
"class EncoderLayer(nn.Module):\n",
" def __init__(self):\n",
" super(EncoderLayer, self).__init__()\n",
" self.enc_self_attn = MultiHeadAttention()\n",
" self.pos_ffn = PoswiseFeedForwardNet()\n",
"\n",
" def forward(self, enc_inputs, enc_self_attn_mask):\n",
" enc_outputs, attn = self.enc_self_attn(enc_inputs, enc_inputs, enc_inputs, enc_self_attn_mask) # enc_inputs to same Q,K,V\n",
" enc_outputs = self.pos_ffn(enc_outputs) # enc_outputs: [batch_size x len_q x d_model]\n",
" return enc_outputs, attn\n",
"\n",
"class DecoderLayer(nn.Module):\n",
" def __init__(self):\n",
" super(DecoderLayer, self).__init__()\n",
" self.dec_self_attn = MultiHeadAttention()\n",
" self.dec_enc_attn = MultiHeadAttention()\n",
" self.pos_ffn = PoswiseFeedForwardNet()\n",
"\n",
" def forward(self, dec_inputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask):\n",
" dec_outputs, dec_self_attn = self.dec_self_attn(dec_inputs, dec_inputs, dec_inputs, dec_self_attn_mask)\n",
" dec_outputs, dec_enc_attn = self.dec_enc_attn(dec_outputs, enc_outputs, enc_outputs, dec_enc_attn_mask)\n",
" dec_outputs = self.pos_ffn(dec_outputs)\n",
" return dec_outputs, dec_self_attn, dec_enc_attn\n",
"\n",
"class Encoder(nn.Module):\n",
" def __init__(self):\n",
" super(Encoder, self).__init__()\n",
" self.src_emb = nn.Embedding(src_vocab_size, d_model)\n",
" self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(src_len+1, d_model),freeze=True)\n",
" self.layers = nn.ModuleList([EncoderLayer() for _ in range(n_layers)])\n",
"\n",
" def forward(self, enc_inputs): # enc_inputs : [batch_size x source_len]\n",
" enc_outputs = self.src_emb(enc_inputs) + self.pos_emb(torch.LongTensor([[1,2,3,4,0]]))\n",
" enc_self_attn_mask = get_attn_pad_mask(enc_inputs, enc_inputs)\n",
" enc_self_attns = []\n",
" for layer in self.layers:\n",
" enc_outputs, enc_self_attn = layer(enc_outputs, enc_self_attn_mask)\n",
" enc_self_attns.append(enc_self_attn)\n",
" return enc_outputs, enc_self_attns\n",
"\n",
"class Decoder(nn.Module):\n",
" def __init__(self):\n",
" super(Decoder, self).__init__()\n",
" self.tgt_emb = nn.Embedding(tgt_vocab_size, d_model)\n",
" self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(tgt_len+1, d_model),freeze=True)\n",
" self.layers = nn.ModuleList([DecoderLayer() for _ in range(n_layers)])\n",
"\n",
" def forward(self, dec_inputs, enc_inputs, enc_outputs): # dec_inputs : [batch_size x target_len]\n",
" dec_outputs = self.tgt_emb(dec_inputs) + self.pos_emb(torch.LongTensor([[5,1,2,3,4]]))\n",
" dec_self_attn_pad_mask = get_attn_pad_mask(dec_inputs, dec_inputs)\n",
" dec_self_attn_subsequent_mask = get_attn_subsequent_mask(dec_inputs)\n",
" dec_self_attn_mask = torch.gt((dec_self_attn_pad_mask + dec_self_attn_subsequent_mask), 0)\n",
"\n",
" dec_enc_attn_mask = get_attn_pad_mask(dec_inputs, enc_inputs)\n",
"\n",
" dec_self_attns, dec_enc_attns = [], []\n",
" for layer in self.layers:\n",
" dec_outputs, dec_self_attn, dec_enc_attn = layer(dec_outputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask)\n",
" dec_self_attns.append(dec_self_attn)\n",
" dec_enc_attns.append(dec_enc_attn)\n",
" return dec_outputs, dec_self_attns, dec_enc_attns\n",
"\n",
"class Transformer(nn.Module):\n",
" def __init__(self):\n",
" super(Transformer, self).__init__()\n",
" self.encoder = Encoder()\n",
" self.decoder = Decoder()\n",
" self.projection = nn.Linear(d_model, tgt_vocab_size, bias=False)\n",
" def forward(self, enc_inputs, dec_inputs):\n",
" enc_outputs, enc_self_attns = self.encoder(enc_inputs)\n",
" dec_outputs, dec_self_attns, dec_enc_attns = self.decoder(dec_inputs, enc_inputs, enc_outputs)\n",
" dec_logits = self.projection(dec_outputs) # dec_logits : [batch_size x src_vocab_size x tgt_vocab_size]\n",
" return dec_logits.view(-1, dec_logits.size(-1)), enc_self_attns, dec_self_attns, dec_enc_attns\n",
"\n",
"def showgraph(attn):\n",
" attn = attn[-1].squeeze(0)[0]\n",
" attn = attn.squeeze(0).data.numpy()\n",
" fig = plt.figure(figsize=(n_heads, n_heads)) # [n_heads, n_heads]\n",
" ax = fig.add_subplot(1, 1, 1)\n",
" ax.matshow(attn, cmap='viridis')\n",
" ax.set_xticklabels(['']+sentences[0].split(), fontdict={'fontsize': 14}, rotation=90)\n",
" ax.set_yticklabels(['']+sentences[2].split(), fontdict={'fontsize': 14})\n",
" plt.show()\n",
"\n",
"if __name__ == '__main__':\n",
" sentences = ['ich mochte ein bier P', 'S i want a beer', 'i want a beer E']\n",
"\n",
" # Transformer Parameters\n",
" # Padding Should be Zero\n",
" src_vocab = {'P': 0, 'ich': 1, 'mochte': 2, 'ein': 3, 'bier': 4}\n",
" src_vocab_size = len(src_vocab)\n",
"\n",
" tgt_vocab = {'P': 0, 'i': 1, 'want': 2, 'a': 3, 'beer': 4, 'S': 5, 'E': 6}\n",
" number_dict = {i: w for i, w in enumerate(tgt_vocab)}\n",
" tgt_vocab_size = len(tgt_vocab)\n",
"\n",
" src_len = 5 # length of source\n",
" tgt_len = 5 # length of target\n",
"\n",
" d_model = 512 # Embedding Size\n",
" d_ff = 2048 # FeedForward dimension\n",
" d_k = d_v = 64 # dimension of K(=Q), V\n",
" n_layers = 6 # number of Encoder of Decoder Layer\n",
" n_heads = 8 # number of heads in Multi-Head Attention\n",
"\n",
" model = Transformer()\n",
"\n",
" criterion = nn.CrossEntropyLoss()\n",
" optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
"\n",
" enc_inputs, dec_inputs, target_batch = make_batch(sentences)\n",
"\n",
" for epoch in range(20):\n",
" optimizer.zero_grad()\n",
" outputs, enc_self_attns, dec_self_attns, dec_enc_attns = model(enc_inputs, dec_inputs)\n",
" loss = criterion(outputs, target_batch.contiguous().view(-1))\n",
" print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
" # Test\n",
" predict, _, _, _ = model(enc_inputs, dec_inputs)\n",
" predict = predict.data.max(1, keepdim=True)[1]\n",
" print(sentences[0], '->', [number_dict[n.item()] for n in predict.squeeze()])\n",
"\n",
" print('first head of last state enc_self_attns')\n",
" showgraph(enc_self_attns)\n",
"\n",
" print('first head of last state dec_self_attns')\n",
" showgraph(dec_self_attns)\n",
"\n",
" print('first head of last state dec_enc_attns')\n",
" showgraph(dec_enc_attns)"
],
"outputs": [],
"execution_count": null
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
================================================
FILE: 5-1.Transformer/Transformer.py
================================================
# %%
# code by Tae Hwan Jung(Jeff Jung) @graykode, Derek Miller @dmmiller612
# Reference : https://github.com/jadore801120/attention-is-all-you-need-pytorch
# https://github.com/JayParks/transformer
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
# S: Symbol that shows starting of decoding input
# E: Symbol that shows starting of decoding output
# P: Symbol that will fill in blank sequence if current batch data size is short than time steps
def make_batch(sentences):
input_batch = [[src_vocab[n] for n in sentences[0].split()]]
output_batch = [[tgt_vocab[n] for n in sentences[1].split()]]
target_batch = [[tgt_vocab[n] for n in sentences[2].split()]]
return torch.LongTensor(input_batch), torch.LongTensor(output_batch), torch.LongTensor(target_batch)
def get_sinusoid_encoding_table(n_position, d_model):
def cal_angle(position, hid_idx):
return position / np.power(10000, 2 * (hid_idx // 2) / d_model)
def get_posi_angle_vec(position):
return [cal_angle(position, hid_j) for hid_j in range(d_model)]
sinusoid_table = np.array([get_posi_angle_vec(pos_i) for pos_i in range(n_position)])
sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2]) # dim 2i
sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2]) # dim 2i+1
return torch.FloatTensor(sinusoid_table)
def get_attn_pad_mask(seq_q, seq_k):
batch_size, len_q = seq_q.size()
batch_size, len_k = seq_k.size()
# eq(zero) is PAD token
pad_attn_mask = seq_k.data.eq(0).unsqueeze(1) # batch_size x 1 x len_k(=len_q), one is masking
return pad_attn_mask.expand(batch_size, len_q, len_k) # batch_size x len_q x len_k
def get_attn_subsequent_mask(seq):
attn_shape = [seq.size(0), seq.size(1), seq.size(1)]
subsequent_mask = np.triu(np.ones(attn_shape), k=1)
subsequent_mask = torch.from_numpy(subsequent_mask).byte()
return subsequent_mask
class ScaledDotProductAttention(nn.Module):
def __init__(self):
super(ScaledDotProductAttention, self).__init__()
def forward(self, Q, K, V, attn_mask):
scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(d_k) # scores : [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]
scores.masked_fill_(attn_mask, -1e9) # Fills elements of self tensor with value where mask is one.
attn = nn.Softmax(dim=-1)(scores)
context = torch.matmul(attn, V)
return context, attn
class MultiHeadAttention(nn.Module):
def __init__(self):
super(MultiHeadAttention, self).__init__()
self.W_Q = nn.Linear(d_model, d_k * n_heads)
self.W_K = nn.Linear(d_model, d_k * n_heads)
self.W_V = nn.Linear(d_model, d_v * n_heads)
self.linear = nn.Linear(n_heads * d_v, d_model)
self.layer_norm = nn.LayerNorm(d_model)
def forward(self, Q, K, V, attn_mask):
# q: [batch_size x len_q x d_model], k: [batch_size x len_k x d_model], v: [batch_size x len_k x d_model]
residual, batch_size = Q, Q.size(0)
# (B, S, D) -proj-> (B, S, D) -split-> (B, S, H, W) -trans-> (B, H, S, W)
q_s = self.W_Q(Q).view(batch_size, -1, n_heads, d_k).transpose(1,2) # q_s: [batch_size x n_heads x len_q x d_k]
k_s = self.W_K(K).view(batch_size, -1, n_heads, d_k).transpose(1,2) # k_s: [batch_size x n_heads x len_k x d_k]
v_s = self.W_V(V).view(batch_size, -1, n_heads, d_v).transpose(1,2) # v_s: [batch_size x n_heads x len_k x d_v]
attn_mask = attn_mask.unsqueeze(1).repeat(1, n_heads, 1, 1) # attn_mask : [batch_size x n_heads x len_q x len_k]
# context: [batch_size x n_heads x len_q x d_v], attn: [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]
context, attn = ScaledDotProductAttention()(q_s, k_s, v_s, attn_mask)
context = context.transpose(1, 2).contiguous().view(batch_size, -1, n_heads * d_v) # context: [batch_size x len_q x n_heads * d_v]
output = self.linear(context)
return self.layer_norm(output + residual), attn # output: [batch_size x len_q x d_model]
class PoswiseFeedForwardNet(nn.Module):
def __init__(self):
super(PoswiseFeedForwardNet, self).__init__()
self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
self.layer_norm = nn.LayerNorm(d_model)
def forward(self, inputs):
residual = inputs # inputs : [batch_size, len_q, d_model]
output = nn.ReLU()(self.conv1(inputs.transpose(1, 2)))
output = self.conv2(output).transpose(1, 2)
return self.layer_norm(output + residual)
class EncoderLayer(nn.Module):
def __init__(self):
super(EncoderLayer, self).__init__()
self.enc_self_attn = MultiHeadAttention()
self.pos_ffn = PoswiseFeedForwardNet()
def forward(self, enc_inputs, enc_self_attn_mask):
enc_outputs, attn = self.enc_self_attn(enc_inputs, enc_inputs, enc_inputs, enc_self_attn_mask) # enc_inputs to same Q,K,V
enc_outputs = self.pos_ffn(enc_outputs) # enc_outputs: [batch_size x len_q x d_model]
return enc_outputs, attn
class DecoderLayer(nn.Module):
def __init__(self):
super(DecoderLayer, self).__init__()
self.dec_self_attn = MultiHeadAttention()
self.dec_enc_attn = MultiHeadAttention()
self.pos_ffn = PoswiseFeedForwardNet()
def forward(self, dec_inputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask):
dec_outputs, dec_self_attn = self.dec_self_attn(dec_inputs, dec_inputs, dec_inputs, dec_self_attn_mask)
dec_outputs, dec_enc_attn = self.dec_enc_attn(dec_outputs, enc_outputs, enc_outputs, dec_enc_attn_mask)
dec_outputs = self.pos_ffn(dec_outputs)
return dec_outputs, dec_self_attn, dec_enc_attn
class Encoder(nn.Module):
def __init__(self):
super(Encoder, self).__init__()
self.src_emb = nn.Embedding(src_vocab_size, d_model)
self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(src_len+1, d_model),freeze=True)
self.layers = nn.ModuleList([EncoderLayer() for _ in range(n_layers)])
def forward(self, enc_inputs): # enc_inputs : [batch_size x source_len]
enc_outputs = self.src_emb(enc_inputs) + self.pos_emb(torch.LongTensor([[1,2,3,4,0]]))
enc_self_attn_mask = get_attn_pad_mask(enc_inputs, enc_inputs)
enc_self_attns = []
for layer in self.layers:
enc_outputs, enc_self_attn = layer(enc_outputs, enc_self_attn_mask)
enc_self_attns.append(enc_self_attn)
return enc_outputs, enc_self_attns
class Decoder(nn.Module):
def __init__(self):
super(Decoder, self).__init__()
self.tgt_emb = nn.Embedding(tgt_vocab_size, d_model)
self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(tgt_len+1, d_model),freeze=True)
self.layers = nn.ModuleList([DecoderLayer() for _ in range(n_layers)])
def forward(self, dec_inputs, enc_inputs, enc_outputs): # dec_inputs : [batch_size x target_len]
dec_outputs = self.tgt_emb(dec_inputs) + self.pos_emb(torch.LongTensor([[5,1,2,3,4]]))
dec_self_attn_pad_mask = get_attn_pad_mask(dec_inputs, dec_inputs)
dec_self_attn_subsequent_mask = get_attn_subsequent_mask(dec_inputs)
dec_self_attn_mask = torch.gt((dec_self_attn_pad_mask + dec_self_attn_subsequent_mask), 0)
dec_enc_attn_mask = get_attn_pad_mask(dec_inputs, enc_inputs)
dec_self_attns, dec_enc_attns = [], []
for layer in self.layers:
dec_outputs, dec_self_attn, dec_enc_attn = layer(dec_outputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask)
dec_self_attns.append(dec_self_attn)
dec_enc_attns.append(dec_enc_attn)
return dec_outputs, dec_self_attns, dec_enc_attns
class Transformer(nn.Module):
def __init__(self):
super(Transformer, self).__init__()
self.encoder = Encoder()
self.decoder = Decoder()
self.projection = nn.Linear(d_model, tgt_vocab_size, bias=False)
def forward(self, enc_inputs, dec_inputs):
enc_outputs, enc_self_attns = self.encoder(enc_inputs)
dec_outputs, dec_self_attns, dec_enc_attns = self.decoder(dec_inputs, enc_inputs, enc_outputs)
dec_logits = self.projection(dec_outputs) # dec_logits : [batch_size x src_vocab_size x tgt_vocab_size]
return dec_logits.view(-1, dec_logits.size(-1)), enc_self_attns, dec_self_attns, dec_enc_attns
def showgraph(attn):
attn = attn[-1].squeeze(0)[0]
attn = attn.squeeze(0).data.numpy()
fig = plt.figure(figsize=(n_heads, n_heads)) # [n_heads, n_heads]
ax = fig.add_subplot(1, 1, 1)
ax.matshow(attn, cmap='viridis')
ax.set_xticklabels(['']+sentences[0].split(), fontdict={'fontsize': 14}, rotation=90)
ax.set_yticklabels(['']+sentences[2].split(), fontdict={'fontsize': 14})
plt.show()
if __name__ == '__main__':
sentences = ['ich mochte ein bier P', 'S i want a beer', 'i want a beer E']
# Transformer Parameters
# Padding Should be Zero
src_vocab = {'P': 0, 'ich': 1, 'mochte': 2, 'ein': 3, 'bier': 4}
src_vocab_size = len(src_vocab)
tgt_vocab = {'P': 0, 'i': 1, 'want': 2, 'a': 3, 'beer': 4, 'S': 5, 'E': 6}
number_dict = {i: w for i, w in enumerate(tgt_vocab)}
tgt_vocab_size = len(tgt_vocab)
src_len = 5 # length of source
tgt_len = 5 # length of target
d_model = 512 # Embedding Size
d_ff = 2048 # FeedForward dimension
d_k = d_v = 64 # dimension of K(=Q), V
n_layers = 6 # number of Encoder of Decoder Layer
n_heads = 8 # number of heads in Multi-Head Attention
model = Transformer()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
enc_inputs, dec_inputs, target_batch = make_batch(sentences)
for epoch in range(20):
optimizer.zero_grad()
outputs, enc_self_attns, dec_self_attns, dec_enc_attns = model(enc_inputs, dec_inputs)
loss = criterion(outputs, target_batch.contiguous().view(-1))
print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
loss.backward()
optimizer.step()
# Test
predict, _, _, _ = model(enc_inputs, dec_inputs)
predict = predict.data.max(1, keepdim=True)[1]
print(sentences[0], '->', [number_dict[n.item()] for n in predict.squeeze()])
print('first head of last state enc_self_attns')
showgraph(enc_self_attns)
print('first head of last state dec_self_attns')
showgraph(dec_self_attns)
print('first head of last state dec_enc_attns')
showgraph(dec_enc_attns)
================================================
FILE: 5-2.BERT/BERT.ipynb
================================================
{
"cells": [
{
"cell_type": "code",
"metadata": {},
"source": [
"# code by Tae Hwan Jung(Jeff Jung) @graykode\n",
"# Reference : https://github.com/jadore801120/attention-is-all-you-need-pytorch\n",
"# https://github.com/JayParks/transformer, https://github.com/dhlee347/pytorchic-bert\n",
"import math\n",
"import re\n",
"from random import *\n",
"import numpy as np\n",
"import torch\n",
"import torch.nn as nn\n",
"import torch.optim as optim\n",
"\n",
"# sample IsNext and NotNext to be same in small batch size\n",
"def make_batch():\n",
" batch = []\n",
" positive = negative = 0\n",
" while positive != batch_size/2 or negative != batch_size/2:\n",
" tokens_a_index, tokens_b_index= randrange(len(sentences)), randrange(len(sentences)) # sample random index in sentences\n",
" tokens_a, tokens_b= token_list[tokens_a_index], token_list[tokens_b_index]\n",
" input_ids = [word_dict['[CLS]']] + tokens_a + [word_dict['[SEP]']] + tokens_b + [word_dict['[SEP]']]\n",
" segment_ids = [0] * (1 + len(tokens_a) + 1) + [1] * (len(tokens_b) + 1)\n",
"\n",
" # MASK LM\n",
" n_pred = min(max_pred, max(1, int(round(len(input_ids) * 0.15)))) # 15 % of tokens in one sentence\n",
" cand_maked_pos = [i for i, token in enumerate(input_ids)\n",
" if token != word_dict['[CLS]'] and token != word_dict['[SEP]']]\n",
" shuffle(cand_maked_pos)\n",
" masked_tokens, masked_pos = [], []\n",
" for pos in cand_maked_pos[:n_pred]:\n",
" masked_pos.append(pos)\n",
" masked_tokens.append(input_ids[pos])\n",
" if random() < 0.8: # 80%\n",
" input_ids[pos] = word_dict['[MASK]'] # make mask\n",
" elif random() < 0.5: # 10%\n",
" index = randint(0, vocab_size - 1) # random index in vocabulary\n",
" input_ids[pos] = word_dict[number_dict[index]] # replace\n",
"\n",
" # Zero Paddings\n",
" n_pad = maxlen - len(input_ids)\n",
" input_ids.extend([0] * n_pad)\n",
" segment_ids.extend([0] * n_pad)\n",
"\n",
" # Zero Padding (100% - 15%) tokens\n",
" if max_pred > n_pred:\n",
" n_pad = max_pred - n_pred\n",
" masked_tokens.extend([0] * n_pad)\n",
" masked_pos.extend([0] * n_pad)\n",
"\n",
" if tokens_a_index + 1 == tokens_b_index and positive < batch_size/2:\n",
" batch.append([input_ids, segment_ids, masked_tokens, masked_pos, True]) # IsNext\n",
" positive += 1\n",
" elif tokens_a_index + 1 != tokens_b_index and negative < batch_size/2:\n",
" batch.append([input_ids, segment_ids, masked_tokens, masked_pos, False]) # NotNext\n",
" negative += 1\n",
" return batch\n",
"# Proprecessing Finished\n",
"\n",
"def get_attn_pad_mask(seq_q, seq_k):\n",
" batch_size, len_q = seq_q.size()\n",
" batch_size, len_k = seq_k.size()\n",
" # eq(zero) is PAD token\n",
" pad_attn_mask = seq_k.data.eq(0).unsqueeze(1) # batch_size x 1 x len_k(=len_q), one is masking\n",
" return pad_attn_mask.expand(batch_size, len_q, len_k) # batch_size x len_q x len_k\n",
"\n",
"def gelu(x):\n",
" \"Implementation of the gelu activation function by Hugging Face\"\n",
" return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))\n",
"\n",
"class Embedding(nn.Module):\n",
" def __init__(self):\n",
" super(Embedding, self).__init__()\n",
" self.tok_embed = nn.Embedding(vocab_size, d_model) # token embedding\n",
" self.pos_embed = nn.Embedding(maxlen, d_model) # position embedding\n",
" self.seg_embed = nn.Embedding(n_segments, d_model) # segment(token type) embedding\n",
" self.norm = nn.LayerNorm(d_model)\n",
"\n",
" def forward(self, x, seg):\n",
" seq_len = x.size(1)\n",
" pos = torch.arange(seq_len, dtype=torch.long)\n",
" pos = pos.unsqueeze(0).expand_as(x) # (seq_len,) -> (batch_size, seq_len)\n",
" embedding = self.tok_embed(x) + self.pos_embed(pos) + self.seg_embed(seg)\n",
" return self.norm(embedding)\n",
"\n",
"class ScaledDotProductAttention(nn.Module):\n",
" def __init__(self):\n",
" super(ScaledDotProductAttention, self).__init__()\n",
"\n",
" def forward(self, Q, K, V, attn_mask):\n",
" scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(d_k) # scores : [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]\n",
" scores.masked_fill_(attn_mask, -1e9) #
gitextract_mxhf1sqm/
├── .github/
│ └── workflows/
│ └── python-app.yml
├── .gitignore
├── 1-1.NNLM/
│ ├── NNLM.ipynb
│ └── NNLM.py
├── 1-2.Word2Vec/
│ ├── Word2Vec-Skipgram(Softmax).ipynb
│ └── Word2Vec-Skipgram(Softmax).py
├── 1-3.FastText/
│ ├── FastText.ipynb
│ ├── test.txt
│ └── train.txt
├── 2-1.TextCNN/
│ ├── TextCNN.ipynb
│ └── TextCNN.py
├── 3-1.TextRNN/
│ ├── TextRNN.ipynb
│ └── TextRNN.py
├── 3-2.TextLSTM/
│ ├── TextLSTM.ipynb
│ └── TextLSTM.py
├── 3-3.Bi-LSTM/
│ ├── Bi-LSTM.ipynb
│ └── Bi-LSTM.py
├── 4-1.Seq2Seq/
│ ├── Seq2Seq.ipynb
│ └── Seq2Seq.py
├── 4-2.Seq2Seq(Attention)/
│ ├── Seq2Seq(Attention).ipynb
│ └── Seq2Seq(Attention).py
├── 4-3.Bi-LSTM(Attention)/
│ ├── Bi-LSTM(Attention).ipynb
│ └── Bi-LSTM(Attention).py
├── 5-1.Transformer/
│ ├── Transformer(Greedy_decoder).ipynb
│ ├── Transformer(Greedy_decoder).py
│ ├── Transformer.ipynb
│ └── Transformer.py
├── 5-2.BERT/
│ ├── BERT.ipynb
│ └── BERT.py
├── CONTRIBUTING.md
├── LICENSE
├── README.md
└── archive/
└── tensorflow/
└── v1/
├── 1-1.NNLM/
│ └── NNLM.py
├── 1-2.Word2Vec/
│ ├── Word2Vec-Skipgram(NCE_loss).py
│ └── Word2Vec-Skipgram(Softmax).py
├── 2-1.TextCNN/
│ └── TextCNN.py
├── 3-1.TextRNN/
│ └── TextRNN.py
├── 3-2.TextLSTM/
│ └── TextLSTM.py
├── 3-3.Bi-LSTM/
│ └── Bi-LSTM.py
├── 4-1.Seq2Seq/
│ └── Seq2Seq.py
├── 4-2.Seq2Seq(Attention)/
│ └── Seq2Seq(Attention).py
└── 4-3.Bi-LSTM(Attention)/
└── Bi-LSTM(Attention).py
SYMBOL INDEX (130 symbols across 20 files)
FILE: 1-1.NNLM/NNLM.py
function make_batch (line 7) | def make_batch():
class NNLM (line 22) | class NNLM(nn.Module):
method __init__ (line 23) | def __init__(self):
method forward (line 32) | def forward(self, X):
FILE: 1-2.Word2Vec/Word2Vec-Skipgram(Softmax).py
function random_batch (line 9) | def random_batch():
class Word2Vec (line 21) | class Word2Vec(nn.Module):
method __init__ (line 22) | def __init__(self):
method forward (line 28) | def forward(self, X):
FILE: 2-1.TextCNN/TextCNN.py
class TextCNN (line 9) | class TextCNN(nn.Module):
method __init__ (line 10) | def __init__(self):
method forward (line 18) | def forward(self, X):
FILE: 3-1.TextRNN/TextRNN.py
function make_batch (line 8) | def make_batch():
class TextRNN (line 22) | class TextRNN(nn.Module):
method __init__ (line 23) | def __init__(self):
method forward (line 29) | def forward(self, hidden, X):
FILE: 3-2.TextLSTM/TextLSTM.py
function make_batch (line 8) | def make_batch():
class TextLSTM (line 19) | class TextLSTM(nn.Module):
method __init__ (line 20) | def __init__(self):
method forward (line 27) | def forward(self, X):
FILE: 3-3.Bi-LSTM/Bi-LSTM.py
function make_batch (line 8) | def make_batch():
class BiLSTM (line 22) | class BiLSTM(nn.Module):
method __init__ (line 23) | def __init__(self):
method forward (line 30) | def forward(self, X):
FILE: 4-1.Seq2Seq/Seq2Seq.py
function make_batch (line 11) | def make_batch():
function make_testbatch (line 30) | def make_testbatch(input_word):
class Seq2Seq (line 43) | class Seq2Seq(nn.Module):
method __init__ (line 44) | def __init__(self):
method forward (line 51) | def forward(self, enc_input, enc_hidden, dec_input):
function translate (line 102) | def translate(word):
FILE: 4-2.Seq2Seq(Attention)/Seq2Seq(Attention).py
function make_batch (line 14) | def make_batch():
class Attention (line 22) | class Attention(nn.Module):
method __init__ (line 23) | def __init__(self):
method forward (line 32) | def forward(self, enc_inputs, hidden, dec_inputs):
method get_att_weight (line 61) | def get_att_weight(self, dec_output, enc_outputs): # get attention we...
method get_att_score (line 71) | def get_att_score(self, dec_output, enc_output): # enc_outputs [batch...
FILE: 4-3.Bi-LSTM(Attention)/Bi-LSTM(Attention).py
class BiLSTM_Attention (line 11) | class BiLSTM_Attention(nn.Module):
method __init__ (line 12) | def __init__(self):
method attention_net (line 20) | def attention_net(self, lstm_output, final_state):
method forward (line 28) | def forward(self, X):
FILE: 5-1.Transformer/Transformer(Greedy_decoder).py
function make_batch (line 15) | def make_batch():
function get_sinusoid_encoding_table (line 21) | def get_sinusoid_encoding_table(n_position, d_model):
function get_attn_pad_mask (line 32) | def get_attn_pad_mask(seq_q, seq_k):
function get_attn_subsequent_mask (line 40) | def get_attn_subsequent_mask(seq):
class ScaledDotProductAttention (line 46) | class ScaledDotProductAttention(nn.Module):
method __init__ (line 47) | def __init__(self):
method forward (line 50) | def forward(self, Q, K, V, attn_mask):
class MultiHeadAttention (line 57) | class MultiHeadAttention(nn.Module):
method __init__ (line 58) | def __init__(self):
method forward (line 66) | def forward(self, Q, K, V, attn_mask):
class PoswiseFeedForwardNet (line 82) | class PoswiseFeedForwardNet(nn.Module):
method __init__ (line 83) | def __init__(self):
method forward (line 89) | def forward(self, inputs):
class EncoderLayer (line 95) | class EncoderLayer(nn.Module):
method __init__ (line 96) | def __init__(self):
method forward (line 101) | def forward(self, enc_inputs, enc_self_attn_mask):
class DecoderLayer (line 106) | class DecoderLayer(nn.Module):
method __init__ (line 107) | def __init__(self):
method forward (line 113) | def forward(self, dec_inputs, enc_outputs, dec_self_attn_mask, dec_enc...
class Encoder (line 119) | class Encoder(nn.Module):
method __init__ (line 120) | def __init__(self):
method forward (line 126) | def forward(self, enc_inputs): # enc_inputs : [batch_size x source_len]
class Decoder (line 135) | class Decoder(nn.Module):
method __init__ (line 136) | def __init__(self):
method forward (line 142) | def forward(self, dec_inputs, enc_inputs, enc_outputs): # dec_inputs :...
class Transformer (line 157) | class Transformer(nn.Module):
method __init__ (line 158) | def __init__(self):
method forward (line 163) | def forward(self, enc_inputs, dec_inputs):
function greedy_decoder (line 169) | def greedy_decoder(model, enc_input, start_symbol):
function showgraph (line 191) | def showgraph(attn):
FILE: 5-1.Transformer/Transformer.py
function make_batch (line 15) | def make_batch(sentences):
function get_sinusoid_encoding_table (line 21) | def get_sinusoid_encoding_table(n_position, d_model):
function get_attn_pad_mask (line 32) | def get_attn_pad_mask(seq_q, seq_k):
function get_attn_subsequent_mask (line 39) | def get_attn_subsequent_mask(seq):
class ScaledDotProductAttention (line 45) | class ScaledDotProductAttention(nn.Module):
method __init__ (line 46) | def __init__(self):
method forward (line 49) | def forward(self, Q, K, V, attn_mask):
class MultiHeadAttention (line 56) | class MultiHeadAttention(nn.Module):
method __init__ (line 57) | def __init__(self):
method forward (line 65) | def forward(self, Q, K, V, attn_mask):
class PoswiseFeedForwardNet (line 81) | class PoswiseFeedForwardNet(nn.Module):
method __init__ (line 82) | def __init__(self):
method forward (line 88) | def forward(self, inputs):
class EncoderLayer (line 94) | class EncoderLayer(nn.Module):
method __init__ (line 95) | def __init__(self):
method forward (line 100) | def forward(self, enc_inputs, enc_self_attn_mask):
class DecoderLayer (line 105) | class DecoderLayer(nn.Module):
method __init__ (line 106) | def __init__(self):
method forward (line 112) | def forward(self, dec_inputs, enc_outputs, dec_self_attn_mask, dec_enc...
class Encoder (line 118) | class Encoder(nn.Module):
method __init__ (line 119) | def __init__(self):
method forward (line 125) | def forward(self, enc_inputs): # enc_inputs : [batch_size x source_len]
class Decoder (line 134) | class Decoder(nn.Module):
method __init__ (line 135) | def __init__(self):
method forward (line 141) | def forward(self, dec_inputs, enc_inputs, enc_outputs): # dec_inputs :...
class Transformer (line 156) | class Transformer(nn.Module):
method __init__ (line 157) | def __init__(self):
method forward (line 162) | def forward(self, enc_inputs, dec_inputs):
function showgraph (line 168) | def showgraph(attn):
FILE: 5-2.BERT/BERT.py
function make_batch (line 14) | def make_batch():
function get_attn_pad_mask (line 58) | def get_attn_pad_mask(seq_q, seq_k):
function gelu (line 65) | def gelu(x):
class Embedding (line 69) | class Embedding(nn.Module):
method __init__ (line 70) | def __init__(self):
method forward (line 77) | def forward(self, x, seg):
class ScaledDotProductAttention (line 84) | class ScaledDotProductAttention(nn.Module):
method __init__ (line 85) | def __init__(self):
method forward (line 88) | def forward(self, Q, K, V, attn_mask):
class MultiHeadAttention (line 95) | class MultiHeadAttention(nn.Module):
method __init__ (line 96) | def __init__(self):
method forward (line 101) | def forward(self, Q, K, V, attn_mask):
class PoswiseFeedForwardNet (line 117) | class PoswiseFeedForwardNet(nn.Module):
method __init__ (line 118) | def __init__(self):
method forward (line 123) | def forward(self, x):
class EncoderLayer (line 127) | class EncoderLayer(nn.Module):
method __init__ (line 128) | def __init__(self):
method forward (line 133) | def forward(self, enc_inputs, enc_self_attn_mask):
class BERT (line 138) | class BERT(nn.Module):
method __init__ (line 139) | def __init__(self):
method forward (line 156) | def forward(self, input_ids, segment_ids, masked_pos):
FILE: archive/tensorflow/v1/1-1.NNLM/NNLM.py
function make_batch (line 19) | def make_batch(sentences):
FILE: archive/tensorflow/v1/1-2.Word2Vec/Word2Vec-Skipgram(NCE_loss).py
function random_batch (line 28) | def random_batch(data, size):
FILE: archive/tensorflow/v1/1-2.Word2Vec/Word2Vec-Skipgram(Softmax).py
function random_batch (line 26) | def random_batch(data, size):
FILE: archive/tensorflow/v1/3-1.TextRNN/TextRNN.py
function make_batch (line 21) | def make_batch(sentences):
FILE: archive/tensorflow/v1/3-2.TextLSTM/TextLSTM.py
function make_batch (line 20) | def make_batch(seq_data):
FILE: archive/tensorflow/v1/3-3.Bi-LSTM/Bi-LSTM.py
function make_batch (line 21) | def make_batch(sentence):
FILE: archive/tensorflow/v1/4-1.Seq2Seq/Seq2Seq.py
function make_batch (line 23) | def make_batch(seq_data):
function translate (line 74) | def translate(word):
FILE: archive/tensorflow/v1/4-2.Seq2Seq(Attention)/Seq2Seq(Attention).py
function make_batch (line 22) | def make_batch(sentences):
function get_att_score (line 37) | def get_att_score(dec_output, enc_output): # enc_output [n_step, n_hidden]
function get_att_weight (line 42) | def get_att_weight(dec_output, enc_outputs):
Condensed preview — 42 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (270K chars).
[
{
"path": ".github/workflows/python-app.yml",
"chars": 963,
"preview": "# This workflow will install Python dependencies, run tests and lint with a single version of Python\n# For more informat"
},
{
"path": ".gitignore",
"chars": 6,
"preview": ".idea\n"
},
{
"path": "1-1.NNLM/NNLM.ipynb",
"chars": 4251,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"metadata\": {},\n \"source\": [\n \"# code by Tae Hwan Jung"
},
{
"path": "1-1.NNLM/NNLM.py",
"chars": 2588,
"preview": "# %%\n# code by Tae Hwan Jung @graykode\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\n\ndef make_batch():"
},
{
"path": "1-2.Word2Vec/Word2Vec-Skipgram(Softmax).ipynb",
"chars": 4607,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"metadata\": {},\n \"source\": [\n \"# code by Tae Hwan Jung"
},
{
"path": "1-2.Word2Vec/Word2Vec-Skipgram(Softmax).py",
"chars": 2883,
"preview": "# %%\n# code by Tae Hwan Jung @graykode\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim"
},
{
"path": "1-3.FastText/FastText.ipynb",
"chars": 57591,
"preview": "{\n \"nbformat\": 4,\n \"nbformat_minor\": 0,\n \"metadata\": {\n \"colab\": {\n \"name\": \"FastText.ipynb\",\n \"version\""
},
{
"path": "1-3.FastText/test.txt",
"chars": 15,
"preview": "sorry hate you"
},
{
"path": "1-3.FastText/train.txt",
"chars": 148,
"preview": "__label__1 i love you\n__label__1 he loves me\n__label__1 she likes baseball\n__label__0 i hate you\n__label__0 sorry for t"
},
{
"path": "2-1.TextCNN/TextCNN.ipynb",
"chars": 5266,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"metadata\": {},\n \"source\": [\n \"# code by Tae Hwan Jung"
},
{
"path": "2-1.TextCNN/TextCNN.py",
"chars": 3515,
"preview": "# %%\n# code by Tae Hwan Jung @graykode\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim"
},
{
"path": "3-1.TextRNN/TextRNN.ipynb",
"chars": 4642,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"metadata\": {},\n \"source\": [\n \"# code by Tae Hwan Jung"
},
{
"path": "3-1.TextRNN/TextRNN.py",
"chars": 2927,
"preview": "# %%\n# code by Tae Hwan Jung @graykode\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim"
},
{
"path": "3-2.TextLSTM/TextLSTM.ipynb",
"chars": 4126,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"metadata\": {},\n \"source\": [\n \"# code by Tae Hwan Jung"
},
{
"path": "3-2.TextLSTM/TextLSTM.py",
"chars": 2536,
"preview": "# %%\n# code by Tae Hwan Jung @graykode\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim"
},
{
"path": "3-3.Bi-LSTM/Bi-LSTM.ipynb",
"chars": 4301,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"metadata\": {},\n \"source\": [\n \"# code by Tae Hwan Jung"
},
{
"path": "3-3.Bi-LSTM/Bi-LSTM.py",
"chars": 2658,
"preview": "# %%\n# code by Tae Hwan Jung @graykode\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim"
},
{
"path": "4-1.Seq2Seq/Seq2Seq.ipynb",
"chars": 6920,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"metadata\": {},\n \"source\": [\n \"# code by Tae Hwan Jung"
},
{
"path": "4-1.Seq2Seq/Seq2Seq.py",
"chars": 4693,
"preview": "# %%\n# code by Tae Hwan Jung @graykode\nimport numpy as np\nimport torch\nimport torch.nn as nn\n\n# S: Symbol that shows sta"
},
{
"path": "4-2.Seq2Seq(Attention)/Seq2Seq(Attention).ipynb",
"chars": 7587,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"metadata\": {},\n \"source\": [\n \"# code by Tae Hwan Jung"
},
{
"path": "4-2.Seq2Seq(Attention)/Seq2Seq(Attention).py",
"chars": 5371,
"preview": "# %%\n# code by Tae Hwan Jung @graykode\n# Reference : https://github.com/hunkim/PyTorchZeroToAll/blob/master/14_2_seq2seq"
},
{
"path": "4-3.Bi-LSTM(Attention)/Bi-LSTM(Attention).ipynb",
"chars": 6127,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"metadata\": {},\n \"source\": [\n \"# code by Tae Hwan Jung"
},
{
"path": "4-3.Bi-LSTM(Attention)/Bi-LSTM(Attention).py",
"chars": 4272,
"preview": "# %%\n# code by Tae Hwan Jung(Jeff Jung) @graykode\n# Reference : https://github.com/prakashpandey9/Text-Classification-Py"
},
{
"path": "5-1.Transformer/Transformer(Greedy_decoder).ipynb",
"chars": 15850,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"metadata\": {},\n \"source\": [\n \"# code by Tae Hwan Jung"
},
{
"path": "5-1.Transformer/Transformer(Greedy_decoder).py",
"chars": 11964,
"preview": "# %%\n# code by Tae Hwan Jung(Jeff Jung) @graykode, Derek Miller @dmmiller612\n# Reference : https://github.com/jadore8011"
},
{
"path": "5-1.Transformer/Transformer.ipynb",
"chars": 14343,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"metadata\": {},\n \"source\": [\n \"# code by Tae Hwan Jung"
},
{
"path": "5-1.Transformer/Transformer.py",
"chars": 10764,
"preview": "# %%\n# code by Tae Hwan Jung(Jeff Jung) @graykode, Derek Miller @dmmiller612\n# Reference : https://github.com/jadore8011"
},
{
"path": "5-2.BERT/BERT.ipynb",
"chars": 14939,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"metadata\": {},\n \"source\": [\n \"# code by Tae Hwan Jung"
},
{
"path": "5-2.BERT/BERT.py",
"chars": 11189,
"preview": "# %%\n# code by Tae Hwan Jung(Jeff Jung) @graykode\n# Reference : https://github.com/jadore801120/attention-is-all-you-nee"
},
{
"path": "CONTRIBUTING.md",
"chars": 448,
"preview": "## Contribution Guidelines\n\nThank you to everyone who contributes. Here are some rules to follow before contributing.\n1."
},
{
"path": "LICENSE",
"chars": 1069,
"preview": "MIT License\n\nCopyright (c) 2019 TaeHwan Jung\n\nPermission is hereby granted, free of charge, to any person obtaining a co"
},
{
"path": "README.md",
"chars": 5129,
"preview": "## nlp-tutorial\n\n<p align=\"center\"><img width=\"100\" src=\"https://upload.wikimedia.org/wikipedia/commons/thumb/1/11/Tenso"
},
{
"path": "archive/tensorflow/v1/1-1.NNLM/NNLM.py",
"chars": 2209,
"preview": "# code by Tae Hwan Jung @graykode\nimport tensorflow as tf\nimport numpy as np\n\ntf.reset_default_graph()\n\nsentences = [ \"i"
},
{
"path": "archive/tensorflow/v1/1-2.Word2Vec/Word2Vec-Skipgram(NCE_loss).py",
"chars": 2849,
"preview": "'''\n code by Tae Hwan Jung(Jeff Jung) @graykode\n reference : https://github.com/golbin/TensorFlow-Tutorials/blob/maste"
},
{
"path": "archive/tensorflow/v1/1-2.Word2Vec/Word2Vec-Skipgram(Softmax).py",
"chars": 2663,
"preview": "'''\n code by Tae Hwan Jung(Jeff Jung) @graykode\n'''\nimport tensorflow as tf\nimport matplotlib.pyplot as plt\nimport nump"
},
{
"path": "archive/tensorflow/v1/2-1.TextCNN/TextCNN.py",
"chars": 3640,
"preview": "'''\n code by Tae Hwan Jung(Jeff Jung) @graykode\n Reference : https://github.com/ioatr/textcnn\n'''\nimport tensorflow as"
},
{
"path": "archive/tensorflow/v1/3-1.TextRNN/TextRNN.py",
"chars": 2256,
"preview": "'''\n code by Tae Hwan Jung(Jeff Jung) @graykode\n'''\nimport tensorflow as tf\nimport numpy as np\n\ntf.reset_default_graph("
},
{
"path": "archive/tensorflow/v1/3-2.TextLSTM/TextLSTM.py",
"chars": 2186,
"preview": "'''\n code by Tae Hwan Jung(Jeff Jung) @graykode\n'''\nimport tensorflow as tf\nimport numpy as np\n\ntf.reset_default_graph("
},
{
"path": "archive/tensorflow/v1/3-3.Bi-LSTM/Bi-LSTM.py",
"chars": 2424,
"preview": "'''\n code by Tae Hwan Jung(Jeff Jung) @graykode\n'''\nimport tensorflow as tf\nimport numpy as np\n\ntf.reset_default_graph("
},
{
"path": "archive/tensorflow/v1/4-1.Seq2Seq/Seq2Seq.py",
"chars": 3573,
"preview": "'''\n code by Tae Hwan Jung(Jeff Jung) @graykode\n reference : https://github.com/golbin/TensorFlow-Tutorials/blob/maste"
},
{
"path": "archive/tensorflow/v1/4-2.Seq2Seq(Attention)/Seq2Seq(Attention).py",
"chars": 5184,
"preview": "# code by Tae Hwan Jung(Jeff Jung) @graykode\nimport tensorflow as tf\nimport matplotlib.pyplot as plt\nimport numpy as np\n"
},
{
"path": "archive/tensorflow/v1/4-3.Bi-LSTM(Attention)/Bi-LSTM(Attention).py",
"chars": 3803,
"preview": "'''\n code by Tae Hwan Jung(Jeff Jung) @graykode\n Reference : https://github.com/prakashpandey9/Text-Classification-Pyt"
}
]
About this extraction
This page contains the full source code of the graykode/nlp-tutorial GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 42 files (248.5 KB), approximately 75.4k tokens, and a symbol index with 130 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.