Repository: graykode/nlp-tutorial Branch: master Commit: d05e31ec81d5 Files: 42 Total size: 248.5 KB Directory structure: gitextract_mxhf1sqm/ ├── .github/ │ └── workflows/ │ └── python-app.yml ├── .gitignore ├── 1-1.NNLM/ │ ├── NNLM.ipynb │ └── NNLM.py ├── 1-2.Word2Vec/ │ ├── Word2Vec-Skipgram(Softmax).ipynb │ └── Word2Vec-Skipgram(Softmax).py ├── 1-3.FastText/ │ ├── FastText.ipynb │ ├── test.txt │ └── train.txt ├── 2-1.TextCNN/ │ ├── TextCNN.ipynb │ └── TextCNN.py ├── 3-1.TextRNN/ │ ├── TextRNN.ipynb │ └── TextRNN.py ├── 3-2.TextLSTM/ │ ├── TextLSTM.ipynb │ └── TextLSTM.py ├── 3-3.Bi-LSTM/ │ ├── Bi-LSTM.ipynb │ └── Bi-LSTM.py ├── 4-1.Seq2Seq/ │ ├── Seq2Seq.ipynb │ └── Seq2Seq.py ├── 4-2.Seq2Seq(Attention)/ │ ├── Seq2Seq(Attention).ipynb │ └── Seq2Seq(Attention).py ├── 4-3.Bi-LSTM(Attention)/ │ ├── Bi-LSTM(Attention).ipynb │ └── Bi-LSTM(Attention).py ├── 5-1.Transformer/ │ ├── Transformer(Greedy_decoder).ipynb │ ├── Transformer(Greedy_decoder).py │ ├── Transformer.ipynb │ └── Transformer.py ├── 5-2.BERT/ │ ├── BERT.ipynb │ └── BERT.py ├── CONTRIBUTING.md ├── LICENSE ├── README.md └── archive/ └── tensorflow/ └── v1/ ├── 1-1.NNLM/ │ └── NNLM.py ├── 1-2.Word2Vec/ │ ├── Word2Vec-Skipgram(NCE_loss).py │ └── Word2Vec-Skipgram(Softmax).py ├── 2-1.TextCNN/ │ └── TextCNN.py ├── 3-1.TextRNN/ │ └── TextRNN.py ├── 3-2.TextLSTM/ │ └── TextLSTM.py ├── 3-3.Bi-LSTM/ │ └── Bi-LSTM.py ├── 4-1.Seq2Seq/ │ └── Seq2Seq.py ├── 4-2.Seq2Seq(Attention)/ │ └── Seq2Seq(Attention).py └── 4-3.Bi-LSTM(Attention)/ └── Bi-LSTM(Attention).py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/python-app.yml ================================================ # This workflow will install Python dependencies, run tests and lint with a single version of Python # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions name: Python application on: push: branches: [ master ] jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Set up Python 3.8 uses: actions/setup-python@v2 with: python-version: 3.8 - name: Install dependencies run: | python -m pip install --upgrade pip pip install py2ipynb==0.0.5 - name: Test with py2ipynb run: | py2ipynb '*/*.py' - name: Commit changes uses: EndBug/add-and-commit@v4 with: author_name: graykode author_email: nlkey2022@gmail.com message: "Automatic convert from py to ipynb" add: "*/*.ipynb" env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} ================================================ FILE: .gitignore ================================================ .idea ================================================ FILE: 1-1.NNLM/NNLM.ipynb ================================================ { "cells": [ { "cell_type": "code", "metadata": {}, "source": [ "# code by Tae Hwan Jung @graykode\n", "import torch\n", "import torch.nn as nn\n", "import torch.optim as optim\n", "\n", "def make_batch():\n", " input_batch = []\n", " target_batch = []\n", "\n", " for sen in sentences:\n", " word = sen.split() # space tokenizer\n", " input = [word_dict[n] for n in word[:-1]] # create (1~n-1) as input\n", " target = word_dict[word[-1]] # create (n) as target, We usually call this 'casual language model'\n", "\n", " input_batch.append(input)\n", " target_batch.append(target)\n", "\n", " return input_batch, target_batch\n", "\n", "# Model\n", "class NNLM(nn.Module):\n", " def __init__(self):\n", " super(NNLM, self).__init__()\n", " self.C = nn.Embedding(n_class, m)\n", " self.H = nn.Linear(n_step * m, n_hidden, bias=False)\n", " self.d = nn.Parameter(torch.ones(n_hidden))\n", " self.U = nn.Linear(n_hidden, n_class, bias=False)\n", " self.W = nn.Linear(n_step * m, n_class, bias=False)\n", " self.b = nn.Parameter(torch.ones(n_class))\n", "\n", " def forward(self, X):\n", " X = self.C(X) # X : [batch_size, n_step, m]\n", " X = X.view(-1, n_step * m) # [batch_size, n_step * m]\n", " tanh = torch.tanh(self.d + self.H(X)) # [batch_size, n_hidden]\n", " output = self.b + self.W(X) + self.U(tanh) # [batch_size, n_class]\n", " return output\n", "\n", "if __name__ == '__main__':\n", " n_step = 2 # number of steps, n-1 in paper\n", " n_hidden = 2 # number of hidden size, h in paper\n", " m = 2 # embedding size, m in paper\n", "\n", " sentences = [\"i like dog\", \"i love coffee\", \"i hate milk\"]\n", "\n", " word_list = \" \".join(sentences).split()\n", " word_list = list(set(word_list))\n", " word_dict = {w: i for i, w in enumerate(word_list)}\n", " number_dict = {i: w for i, w in enumerate(word_list)}\n", " n_class = len(word_dict) # number of Vocabulary\n", "\n", " model = NNLM()\n", "\n", " criterion = nn.CrossEntropyLoss()\n", " optimizer = optim.Adam(model.parameters(), lr=0.001)\n", "\n", " input_batch, target_batch = make_batch()\n", " input_batch = torch.LongTensor(input_batch)\n", " target_batch = torch.LongTensor(target_batch)\n", "\n", " # Training\n", " for epoch in range(5000):\n", " optimizer.zero_grad()\n", " output = model(input_batch)\n", "\n", " # output : [batch_size, n_class], target_batch : [batch_size]\n", " loss = criterion(output, target_batch)\n", " if (epoch + 1) % 1000 == 0:\n", " print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n", "\n", " loss.backward()\n", " optimizer.step()\n", "\n", " # Predict\n", " predict = model(input_batch).data.max(1, keepdim=True)[1]\n", "\n", " # Test\n", " print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])" ], "outputs": [], "execution_count": null } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: 1-1.NNLM/NNLM.py ================================================ # %% # code by Tae Hwan Jung @graykode import torch import torch.nn as nn import torch.optim as optim def make_batch(): input_batch = [] target_batch = [] for sen in sentences: word = sen.split() # space tokenizer input = [word_dict[n] for n in word[:-1]] # create (1~n-1) as input target = word_dict[word[-1]] # create (n) as target, We usually call this 'casual language model' input_batch.append(input) target_batch.append(target) return input_batch, target_batch # Model class NNLM(nn.Module): def __init__(self): super(NNLM, self).__init__() self.C = nn.Embedding(n_class, m) self.H = nn.Linear(n_step * m, n_hidden, bias=False) self.d = nn.Parameter(torch.ones(n_hidden)) self.U = nn.Linear(n_hidden, n_class, bias=False) self.W = nn.Linear(n_step * m, n_class, bias=False) self.b = nn.Parameter(torch.ones(n_class)) def forward(self, X): X = self.C(X) # X : [batch_size, n_step, m] X = X.view(-1, n_step * m) # [batch_size, n_step * m] tanh = torch.tanh(self.d + self.H(X)) # [batch_size, n_hidden] output = self.b + self.W(X) + self.U(tanh) # [batch_size, n_class] return output if __name__ == '__main__': n_step = 2 # number of steps, n-1 in paper n_hidden = 2 # number of hidden size, h in paper m = 2 # embedding size, m in paper sentences = ["i like dog", "i love coffee", "i hate milk"] word_list = " ".join(sentences).split() word_list = list(set(word_list)) word_dict = {w: i for i, w in enumerate(word_list)} number_dict = {i: w for i, w in enumerate(word_list)} n_class = len(word_dict) # number of Vocabulary model = NNLM() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) input_batch, target_batch = make_batch() input_batch = torch.LongTensor(input_batch) target_batch = torch.LongTensor(target_batch) # Training for epoch in range(5000): optimizer.zero_grad() output = model(input_batch) # output : [batch_size, n_class], target_batch : [batch_size] loss = criterion(output, target_batch) if (epoch + 1) % 1000 == 0: print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) loss.backward() optimizer.step() # Predict predict = model(input_batch).data.max(1, keepdim=True)[1] # Test print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()]) ================================================ FILE: 1-2.Word2Vec/Word2Vec-Skipgram(Softmax).ipynb ================================================ { "cells": [ { "cell_type": "code", "metadata": {}, "source": [ "# code by Tae Hwan Jung @graykode\n", "import numpy as np\n", "import torch\n", "import torch.nn as nn\n", "import torch.optim as optim\n", "import matplotlib.pyplot as plt\n", "\n", "def random_batch():\n", " random_inputs = []\n", " random_labels = []\n", " random_index = np.random.choice(range(len(skip_grams)), batch_size, replace=False)\n", "\n", " for i in random_index:\n", " random_inputs.append(np.eye(voc_size)[skip_grams[i][0]]) # target\n", " random_labels.append(skip_grams[i][1]) # context word\n", "\n", " return random_inputs, random_labels\n", "\n", "# Model\n", "class Word2Vec(nn.Module):\n", " def __init__(self):\n", " super(Word2Vec, self).__init__()\n", " # W and WT is not Traspose relationship\n", " self.W = nn.Linear(voc_size, embedding_size, bias=False) # voc_size > embedding_size Weight\n", " self.WT = nn.Linear(embedding_size, voc_size, bias=False) # embedding_size > voc_size Weight\n", "\n", " def forward(self, X):\n", " # X : [batch_size, voc_size]\n", " hidden_layer = self.W(X) # hidden_layer : [batch_size, embedding_size]\n", " output_layer = self.WT(hidden_layer) # output_layer : [batch_size, voc_size]\n", " return output_layer\n", "\n", "if __name__ == '__main__':\n", " batch_size = 2 # mini-batch size\n", " embedding_size = 2 # embedding size\n", "\n", " sentences = [\"apple banana fruit\", \"banana orange fruit\", \"orange banana fruit\",\n", " \"dog cat animal\", \"cat monkey animal\", \"monkey dog animal\"]\n", "\n", " word_sequence = \" \".join(sentences).split()\n", " word_list = \" \".join(sentences).split()\n", " word_list = list(set(word_list))\n", " word_dict = {w: i for i, w in enumerate(word_list)}\n", " voc_size = len(word_list)\n", "\n", " # Make skip gram of one size window\n", " skip_grams = []\n", " for i in range(1, len(word_sequence) - 1):\n", " target = word_dict[word_sequence[i]]\n", " context = [word_dict[word_sequence[i - 1]], word_dict[word_sequence[i + 1]]]\n", " for w in context:\n", " skip_grams.append([target, w])\n", "\n", " model = Word2Vec()\n", "\n", " criterion = nn.CrossEntropyLoss()\n", " optimizer = optim.Adam(model.parameters(), lr=0.001)\n", "\n", " # Training\n", " for epoch in range(5000):\n", " input_batch, target_batch = random_batch()\n", " input_batch = torch.Tensor(input_batch)\n", " target_batch = torch.LongTensor(target_batch)\n", "\n", " optimizer.zero_grad()\n", " output = model(input_batch)\n", "\n", " # output : [batch_size, voc_size], target_batch : [batch_size] (LongTensor, not one-hot)\n", " loss = criterion(output, target_batch)\n", " if (epoch + 1) % 1000 == 0:\n", " print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n", "\n", " loss.backward()\n", " optimizer.step()\n", "\n", " for i, label in enumerate(word_list):\n", " W, WT = model.parameters()\n", " x, y = W[0][i].item(), W[1][i].item()\n", " plt.scatter(x, y)\n", " plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom')\n", " plt.show()\n" ], "outputs": [], "execution_count": null } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: 1-2.Word2Vec/Word2Vec-Skipgram(Softmax).py ================================================ # %% # code by Tae Hwan Jung @graykode import numpy as np import torch import torch.nn as nn import torch.optim as optim import matplotlib.pyplot as plt def random_batch(): random_inputs = [] random_labels = [] random_index = np.random.choice(range(len(skip_grams)), batch_size, replace=False) for i in random_index: random_inputs.append(np.eye(voc_size)[skip_grams[i][0]]) # target random_labels.append(skip_grams[i][1]) # context word return random_inputs, random_labels # Model class Word2Vec(nn.Module): def __init__(self): super(Word2Vec, self).__init__() # W and WT is not Traspose relationship self.W = nn.Linear(voc_size, embedding_size, bias=False) # voc_size > embedding_size Weight self.WT = nn.Linear(embedding_size, voc_size, bias=False) # embedding_size > voc_size Weight def forward(self, X): # X : [batch_size, voc_size] hidden_layer = self.W(X) # hidden_layer : [batch_size, embedding_size] output_layer = self.WT(hidden_layer) # output_layer : [batch_size, voc_size] return output_layer if __name__ == '__main__': batch_size = 2 # mini-batch size embedding_size = 2 # embedding size sentences = ["apple banana fruit", "banana orange fruit", "orange banana fruit", "dog cat animal", "cat monkey animal", "monkey dog animal"] word_sequence = " ".join(sentences).split() word_list = " ".join(sentences).split() word_list = list(set(word_list)) word_dict = {w: i for i, w in enumerate(word_list)} voc_size = len(word_list) # Make skip gram of one size window skip_grams = [] for i in range(1, len(word_sequence) - 1): target = word_dict[word_sequence[i]] context = [word_dict[word_sequence[i - 1]], word_dict[word_sequence[i + 1]]] for w in context: skip_grams.append([target, w]) model = Word2Vec() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) # Training for epoch in range(5000): input_batch, target_batch = random_batch() input_batch = torch.Tensor(input_batch) target_batch = torch.LongTensor(target_batch) optimizer.zero_grad() output = model(input_batch) # output : [batch_size, voc_size], target_batch : [batch_size] (LongTensor, not one-hot) loss = criterion(output, target_batch) if (epoch + 1) % 1000 == 0: print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) loss.backward() optimizer.step() for i, label in enumerate(word_list): W, WT = model.parameters() x, y = W[0][i].item(), W[1][i].item() plt.scatter(x, y) plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom') plt.show() ================================================ FILE: 1-3.FastText/FastText.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "FastText.ipynb", "version": "0.3.2", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "accelerator": "GPU" }, "cells": [ { "metadata": { "id": "kg9kgMnGqYkU", "colab_type": "text" }, "cell_type": "markdown", "source": [ "## Install [FastText](https://fasttext.cc/docs/en/supervised-tutorial.html)" ] }, { "metadata": { "id": "3Iod5UKTqZnC", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 11051 }, "outputId": "b10c85c0-c4cf-4f0b-a30e-2207ae4512b2" }, "cell_type": "code", "source": [ "!wget https://github.com/facebookresearch/fastText/archive/0.2.0.zip\n", "!unzip 0.2.0.zip\n", "%cd fastText-0.2.0\n", "!make" ], "execution_count": 1, "outputs": [ { "output_type": "stream", "text": [ "--2019-02-02 14:43:56-- https://github.com/facebookresearch/fastText/archive/0.2.0.zip\n", "Resolving github.com (github.com)... 140.82.118.3, 140.82.118.4\n", "Connecting to github.com (github.com)|140.82.118.3|:443... connected.\n", "HTTP request sent, awaiting response... 302 Found\n", "Location: https://codeload.github.com/facebookresearch/fastText/zip/0.2.0 [following]\n", "--2019-02-02 14:43:56-- https://codeload.github.com/facebookresearch/fastText/zip/0.2.0\n", "Resolving codeload.github.com (codeload.github.com)... 192.30.253.121, 192.30.253.120\n", "Connecting to codeload.github.com (codeload.github.com)|192.30.253.121|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [application/zip]\n", "Saving to: ‘0.2.0.zip’\n", "\n", "0.2.0.zip [ <=> ] 4.10M 6.17MB/s in 0.7s \n", "\n", "2019-02-02 14:43:57 (6.17 MB/s) - ‘0.2.0.zip’ saved [4304799]\n", "\n", "Archive: 0.2.0.zip\n", "7842495a4d64c7a3bb4339d45d6e64321d002ed8\n", " creating: fastText-0.2.0/\n", " creating: fastText-0.2.0/.circleci/\n", " inflating: fastText-0.2.0/.circleci/cmake_test.sh \n", " inflating: fastText-0.2.0/.circleci/config.yml \n", " inflating: fastText-0.2.0/.circleci/gcc_test.sh \n", " inflating: fastText-0.2.0/.circleci/pip_test.sh \n", " inflating: fastText-0.2.0/.circleci/pull_data.sh \n", " inflating: fastText-0.2.0/.circleci/python_test.sh \n", " inflating: fastText-0.2.0/.circleci/run_locally.sh \n", " inflating: fastText-0.2.0/.circleci/setup_circleimg.sh \n", " inflating: fastText-0.2.0/.circleci/setup_debian.sh \n", " inflating: fastText-0.2.0/.gitignore \n", " inflating: fastText-0.2.0/CMakeLists.txt \n", " inflating: fastText-0.2.0/CONTRIBUTING.md \n", " inflating: fastText-0.2.0/LICENSE \n", " inflating: fastText-0.2.0/MANIFEST.in \n", " inflating: fastText-0.2.0/Makefile \n", " inflating: fastText-0.2.0/README.md \n", " creating: fastText-0.2.0/alignment/\n", " inflating: fastText-0.2.0/alignment/README.md \n", " inflating: fastText-0.2.0/alignment/align.py \n", " inflating: fastText-0.2.0/alignment/eval.py \n", " inflating: fastText-0.2.0/alignment/example.sh \n", " inflating: fastText-0.2.0/alignment/utils.py \n", " inflating: fastText-0.2.0/classification-example.sh \n", " inflating: fastText-0.2.0/classification-results.sh \n", " creating: fastText-0.2.0/docs/\n", " inflating: fastText-0.2.0/docs/aligned-vectors.md \n", " inflating: fastText-0.2.0/docs/api.md \n", " inflating: fastText-0.2.0/docs/cheatsheet.md \n", " inflating: fastText-0.2.0/docs/crawl-vectors.md \n", " inflating: fastText-0.2.0/docs/dataset.md \n", " inflating: fastText-0.2.0/docs/english-vectors.md \n", " inflating: fastText-0.2.0/docs/faqs.md \n", " inflating: fastText-0.2.0/docs/language-identification.md \n", " inflating: fastText-0.2.0/docs/options.md \n", " inflating: fastText-0.2.0/docs/pretrained-vectors.md \n", " inflating: fastText-0.2.0/docs/references.md \n", " inflating: fastText-0.2.0/docs/supervised-models.md \n", " inflating: fastText-0.2.0/docs/supervised-tutorial.md \n", " inflating: fastText-0.2.0/docs/support.md \n", " inflating: fastText-0.2.0/docs/unsupervised-tutorials.md \n", " inflating: fastText-0.2.0/eval.py \n", " inflating: fastText-0.2.0/get-wikimedia.sh \n", " inflating: fastText-0.2.0/pretrained-vectors.md \n", " creating: fastText-0.2.0/python/\n", " inflating: fastText-0.2.0/python/README.md \n", " inflating: fastText-0.2.0/python/README.rst \n", " creating: fastText-0.2.0/python/benchmarks/\n", " inflating: fastText-0.2.0/python/benchmarks/README.rst \n", " inflating: fastText-0.2.0/python/benchmarks/get_word_vector.py \n", " creating: fastText-0.2.0/python/doc/\n", " creating: fastText-0.2.0/python/doc/examples/\n", " inflating: fastText-0.2.0/python/doc/examples/FastTextEmbeddingBag.py \n", " inflating: fastText-0.2.0/python/doc/examples/bin_to_vec.py \n", " inflating: fastText-0.2.0/python/doc/examples/compute_accuracy.py \n", " inflating: fastText-0.2.0/python/doc/examples/get_vocab.py \n", " inflating: fastText-0.2.0/python/doc/examples/train_supervised.py \n", " inflating: fastText-0.2.0/python/doc/examples/train_unsupervised.py \n", " creating: fastText-0.2.0/python/fastText/\n", " inflating: fastText-0.2.0/python/fastText/FastText.py \n", " inflating: fastText-0.2.0/python/fastText/__init__.py \n", " creating: fastText-0.2.0/python/fastText/pybind/\n", " inflating: fastText-0.2.0/python/fastText/pybind/fasttext_pybind.cc \n", " creating: fastText-0.2.0/python/fastText/tests/\n", " inflating: fastText-0.2.0/python/fastText/tests/__init__.py \n", " inflating: fastText-0.2.0/python/fastText/tests/test_configurations.py \n", " inflating: fastText-0.2.0/python/fastText/tests/test_script.py \n", " creating: fastText-0.2.0/python/fastText/util/\n", " inflating: fastText-0.2.0/python/fastText/util/__init__.py \n", " inflating: fastText-0.2.0/python/fastText/util/util.py \n", " inflating: fastText-0.2.0/quantization-example.sh \n", " inflating: fastText-0.2.0/runtests.py \n", " creating: fastText-0.2.0/scripts/\n", " creating: fastText-0.2.0/scripts/kbcompletion/\n", " inflating: fastText-0.2.0/scripts/kbcompletion/README.md \n", " inflating: fastText-0.2.0/scripts/kbcompletion/data.sh \n", " inflating: fastText-0.2.0/scripts/kbcompletion/eval.cpp \n", " inflating: fastText-0.2.0/scripts/kbcompletion/fb15k.sh \n", " inflating: fastText-0.2.0/scripts/kbcompletion/fb15k237.sh \n", " inflating: fastText-0.2.0/scripts/kbcompletion/svo.sh \n", " inflating: fastText-0.2.0/scripts/kbcompletion/wn18.sh \n", " creating: fastText-0.2.0/scripts/quantization/\n", " inflating: fastText-0.2.0/scripts/quantization/quantization-results.sh \n", " extracting: fastText-0.2.0/setup.cfg \n", " inflating: fastText-0.2.0/setup.py \n", " creating: fastText-0.2.0/src/\n", " inflating: fastText-0.2.0/src/args.cc \n", " inflating: fastText-0.2.0/src/args.h \n", " inflating: fastText-0.2.0/src/dictionary.cc \n", " inflating: fastText-0.2.0/src/dictionary.h \n", " inflating: fastText-0.2.0/src/fasttext.cc \n", " inflating: fastText-0.2.0/src/fasttext.h \n", " inflating: fastText-0.2.0/src/main.cc \n", " inflating: fastText-0.2.0/src/matrix.cc \n", " inflating: fastText-0.2.0/src/matrix.h \n", " inflating: fastText-0.2.0/src/meter.cc \n", " inflating: fastText-0.2.0/src/meter.h \n", " inflating: fastText-0.2.0/src/model.cc \n", " inflating: fastText-0.2.0/src/model.h \n", " inflating: fastText-0.2.0/src/productquantizer.cc \n", " inflating: fastText-0.2.0/src/productquantizer.h \n", " inflating: fastText-0.2.0/src/qmatrix.cc \n", " inflating: fastText-0.2.0/src/qmatrix.h \n", " inflating: fastText-0.2.0/src/real.h \n", " inflating: fastText-0.2.0/src/utils.cc \n", " inflating: fastText-0.2.0/src/utils.h \n", " inflating: fastText-0.2.0/src/vector.cc \n", " inflating: fastText-0.2.0/src/vector.h \n", " creating: fastText-0.2.0/tests/\n", " inflating: fastText-0.2.0/tests/fetch_test_data.sh \n", " creating: fastText-0.2.0/website/\n", " inflating: fastText-0.2.0/website/README.md \n", " creating: fastText-0.2.0/website/blog/\n", " inflating: fastText-0.2.0/website/blog/2016-08-18-blog-post.md \n", " inflating: fastText-0.2.0/website/blog/2017-05-02-blog-post.md \n", " inflating: fastText-0.2.0/website/blog/2017-10-02-blog-post.md \n", " creating: fastText-0.2.0/website/core/\n", " inflating: fastText-0.2.0/website/core/Footer.js \n", " inflating: fastText-0.2.0/website/package.json \n", " creating: fastText-0.2.0/website/pages/\n", " creating: fastText-0.2.0/website/pages/en/\n", " inflating: fastText-0.2.0/website/pages/en/index.js \n", " inflating: fastText-0.2.0/website/sidebars.json \n", " inflating: fastText-0.2.0/website/siteConfig.js \n", " creating: fastText-0.2.0/website/static/\n", " creating: fastText-0.2.0/website/static/docs/\n", " creating: fastText-0.2.0/website/static/docs/en/\n", " creating: fastText-0.2.0/website/static/docs/en/html/\n", " extracting: fastText-0.2.0/website/static/docs/en/html/.classfasttext_1_1QMatrix-members.html.i4eKqy \n", " inflating: fastText-0.2.0/website/static/docs/en/html/annotated.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/annotated_dup.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/args_8cc.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/args_8h.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/args_8h.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/args_8h_source.html \n", " extracting: fastText-0.2.0/website/static/docs/en/html/bc_s.png \n", " inflating: fastText-0.2.0/website/static/docs/en/html/bdwn.png \n", " inflating: fastText-0.2.0/website/static/docs/en/html/classes.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Args-members.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Args.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Args.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Dictionary-members.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Dictionary.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Dictionary.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1FastText-members.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1FastText.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1FastText.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Matrix-members.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Matrix.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Matrix.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Model-members.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Model.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Model.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1ProductQuantizer-members.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1QMatrix-members.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1QMatrix.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1QMatrix.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Vector-members.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Vector.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/classfasttext_1_1Vector.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/closed.png \n", " inflating: fastText-0.2.0/website/static/docs/en/html/dictionary_8cc.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/dictionary_8h.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/dictionary_8h.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/dictionary_8h_source.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.js \n", " extracting: fastText-0.2.0/website/static/docs/en/html/doc.png \n", " inflating: fastText-0.2.0/website/static/docs/en/html/doxygen.css \n", " extracting: fastText-0.2.0/website/static/docs/en/html/doxygen.png \n", " inflating: fastText-0.2.0/website/static/docs/en/html/dynsections.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/fasttext_8cc.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/fasttext_8h.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/fasttext_8h.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/fasttext_8h_source.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/favicon.png \n", " inflating: fastText-0.2.0/website/static/docs/en/html/files.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/files.js \n", " extracting: fastText-0.2.0/website/static/docs/en/html/folderclosed.png \n", " extracting: fastText-0.2.0/website/static/docs/en/html/folderopen.png \n", " inflating: fastText-0.2.0/website/static/docs/en/html/functions.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/functions_0x7e.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/functions_b.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/functions_c.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/functions_d.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/functions_dup.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/functions_e.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/functions_f.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/functions_func.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/functions_g.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/functions_h.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/functions_i.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/functions_k.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/functions_l.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/functions_m.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/functions_n.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/functions_o.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/functions_p.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/functions_q.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/functions_r.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/functions_s.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/functions_t.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/functions_u.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/functions_v.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/functions_vars.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/functions_w.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/functions_z.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/globals.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/globals_defs.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/globals_func.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/index.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/jquery.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/main_8cc.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/main_8cc.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/matrix_8cc.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/matrix_8h.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/matrix_8h_source.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/menu.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/menudata.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/model_8cc.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/model_8h.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/model_8h.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/model_8h_source.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/namespacefasttext.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/namespacefasttext.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/namespacefasttext_1_1utils.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/namespacemembers.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/namespacemembers_enum.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/namespacemembers_func.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/namespacemembers_type.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/namespaces.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/namespaces.js \n", " extracting: fastText-0.2.0/website/static/docs/en/html/nav_f.png \n", " inflating: fastText-0.2.0/website/static/docs/en/html/nav_g.png \n", " inflating: fastText-0.2.0/website/static/docs/en/html/nav_h.png \n", " inflating: fastText-0.2.0/website/static/docs/en/html/navtree.css \n", " inflating: fastText-0.2.0/website/static/docs/en/html/navtree.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/navtreedata.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/navtreeindex0.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/navtreeindex1.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/open.png \n", " inflating: fastText-0.2.0/website/static/docs/en/html/productquantizer_8cc.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/productquantizer_8cc.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/productquantizer_8h.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/productquantizer_8h_source.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/qmatrix_8cc.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/qmatrix_8h.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/qmatrix_8h_source.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/real_8h.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/real_8h.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/real_8h_source.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/resize.js \n", " creating: fastText-0.2.0/website/static/docs/en/html/search/\n", " extracting: fastText-0.2.0/website/static/docs/en/html/search/.files_7.html.StRRNc \n", " extracting: fastText-0.2.0/website/static/docs/en/html/search/.variables_a.html.1MGQ27 \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_0.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_0.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_1.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_1.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_10.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_10.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_11.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_11.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_12.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_12.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_13.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_13.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_14.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_14.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_15.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_15.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_16.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_16.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_17.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_17.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_2.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_2.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_3.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_3.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_4.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_4.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_5.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_5.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_6.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_6.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_7.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_7.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_8.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_8.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_9.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_9.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_a.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_a.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_b.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_b.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_c.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_c.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_d.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_d.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_e.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_e.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_f.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/all_f.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_0.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_0.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_1.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_1.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_2.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_2.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_3.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_3.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_4.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_4.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_5.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_5.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_6.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_6.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_7.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_7.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_8.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/classes_8.js \n", " extracting: fastText-0.2.0/website/static/docs/en/html/search/close.png \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/defines_0.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/defines_0.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/defines_1.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/defines_1.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/defines_2.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/defines_2.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/defines_3.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/defines_3.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/enums_0.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/enums_0.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/enums_1.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/enums_1.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/enums_2.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/enums_2.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_0.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_0.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_1.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_1.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_2.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_2.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_3.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_3.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_4.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_4.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_5.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/enumvalues_5.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/files_0.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/files_0.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/files_1.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/files_1.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/files_2.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/files_2.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/files_3.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/files_3.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/files_4.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/files_4.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/files_5.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/files_5.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/files_6.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/files_6.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/files_7.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/files_7.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/files_8.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/files_8.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_0.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_0.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_1.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_1.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_10.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_10.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_11.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_11.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_12.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_12.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_13.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_13.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_14.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_14.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_15.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_15.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_16.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_16.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_17.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_17.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_2.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_2.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_3.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_3.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_4.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_4.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_5.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_5.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_6.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_6.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_7.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_7.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_8.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_8.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_9.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_9.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_a.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_a.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_b.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_b.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_c.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_c.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_d.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_d.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_e.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_e.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_f.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/functions_f.js \n", " extracting: fastText-0.2.0/website/static/docs/en/html/search/mag_sel.png \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/namespaces_0.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/namespaces_0.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/nomatches.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/search.css \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/search.js \n", " extracting: fastText-0.2.0/website/static/docs/en/html/search/search_l.png \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/search_m.png \n", " extracting: fastText-0.2.0/website/static/docs/en/html/search/search_r.png \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/searchdata.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/typedefs_0.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/typedefs_0.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/typedefs_1.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/typedefs_1.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_0.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_0.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_1.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_1.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_10.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_10.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_11.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_11.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_12.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_12.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_13.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_13.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_2.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_2.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_3.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_3.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_4.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_4.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_5.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_5.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_6.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_6.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_7.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_7.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_8.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_8.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_9.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_9.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_a.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_a.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_b.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_b.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_c.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_c.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_d.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_d.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_e.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_e.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_f.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/search/variables_f.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/splitbar.png \n", " inflating: fastText-0.2.0/website/static/docs/en/html/structfasttext_1_1Node-members.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/structfasttext_1_1Node.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/structfasttext_1_1Node.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/structfasttext_1_1entry-members.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/structfasttext_1_1entry.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/structfasttext_1_1entry.js \n", " extracting: fastText-0.2.0/website/static/docs/en/html/sync_off.png \n", " extracting: fastText-0.2.0/website/static/docs/en/html/sync_on.png \n", " extracting: fastText-0.2.0/website/static/docs/en/html/tab_a.png \n", " extracting: fastText-0.2.0/website/static/docs/en/html/tab_b.png \n", " extracting: fastText-0.2.0/website/static/docs/en/html/tab_h.png \n", " extracting: fastText-0.2.0/website/static/docs/en/html/tab_s.png \n", " inflating: fastText-0.2.0/website/static/docs/en/html/tabs.css \n", " inflating: fastText-0.2.0/website/static/docs/en/html/utils_8cc.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/utils_8cc.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/utils_8h.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/utils_8h.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/utils_8h_source.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/vector_8cc.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/vector_8cc.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/vector_8h.html \n", " inflating: fastText-0.2.0/website/static/docs/en/html/vector_8h.js \n", " inflating: fastText-0.2.0/website/static/docs/en/html/vector_8h_source.html \n", " inflating: fastText-0.2.0/website/static/fasttext.css \n", " creating: fastText-0.2.0/website/static/img/\n", " creating: fastText-0.2.0/website/static/img/authors/\n", " inflating: fastText-0.2.0/website/static/img/authors/armand_joulin.jpg \n", " inflating: fastText-0.2.0/website/static/img/authors/christian_puhrsch.png \n", " inflating: fastText-0.2.0/website/static/img/authors/edouard_grave.jpeg \n", " inflating: fastText-0.2.0/website/static/img/authors/piotr_bojanowski.jpg \n", " inflating: fastText-0.2.0/website/static/img/authors/tomas_mikolov.jpg \n", " creating: fastText-0.2.0/website/static/img/blog/\n", " inflating: fastText-0.2.0/website/static/img/blog/2016-08-18-blog-post-img1.png \n", " inflating: fastText-0.2.0/website/static/img/blog/2016-08-18-blog-post-img2.png \n", " inflating: fastText-0.2.0/website/static/img/blog/2017-05-02-blog-post-img1.jpg \n", " inflating: fastText-0.2.0/website/static/img/blog/2017-05-02-blog-post-img2.jpg \n", " inflating: fastText-0.2.0/website/static/img/blog/2017-10-02-blog-post-img1.png \n", " inflating: fastText-0.2.0/website/static/img/cbo_vs_skipgram.png \n", " inflating: fastText-0.2.0/website/static/img/fasttext-icon-api.png \n", " inflating: fastText-0.2.0/website/static/img/fasttext-icon-bg-web.png \n", " inflating: fastText-0.2.0/website/static/img/fasttext-icon-color-square.png \n", " inflating: fastText-0.2.0/website/static/img/fasttext-icon-color-web.png \n", " inflating: fastText-0.2.0/website/static/img/fasttext-icon-faq.png \n", " inflating: fastText-0.2.0/website/static/img/fasttext-icon-tutorial.png \n", " inflating: fastText-0.2.0/website/static/img/fasttext-icon-white-web.png \n", " inflating: fastText-0.2.0/website/static/img/fasttext-logo-color-web.png \n", " inflating: fastText-0.2.0/website/static/img/fasttext-logo-white-web.png \n", " inflating: fastText-0.2.0/website/static/img/logo-color.png \n", " inflating: fastText-0.2.0/website/static/img/model-black.png \n", " inflating: fastText-0.2.0/website/static/img/model-blue.png \n", " inflating: fastText-0.2.0/website/static/img/model-red.png \n", " inflating: fastText-0.2.0/website/static/img/ogimage.png \n", " inflating: fastText-0.2.0/website/static/img/oss_logo.png \n", " inflating: fastText-0.2.0/wikifil.pl \n", " inflating: fastText-0.2.0/word-vector-example.sh \n", "/content/fastText-0.2.0\n", "c++ -pthread -std=c++0x -march=native -O3 -funroll-loops -c src/args.cc\n", "c++ -pthread -std=c++0x -march=native -O3 -funroll-loops -c src/dictionary.cc\n", "c++ -pthread -std=c++0x -march=native -O3 -funroll-loops -c src/productquantizer.cc\n", "c++ -pthread -std=c++0x -march=native -O3 -funroll-loops -c src/matrix.cc\n", "c++ -pthread -std=c++0x -march=native -O3 -funroll-loops -c src/qmatrix.cc\n", "c++ -pthread -std=c++0x -march=native -O3 -funroll-loops -c src/vector.cc\n", "c++ -pthread -std=c++0x -march=native -O3 -funroll-loops -c src/model.cc\n", "c++ -pthread -std=c++0x -march=native -O3 -funroll-loops -c src/utils.cc\n", "c++ -pthread -std=c++0x -march=native -O3 -funroll-loops -c src/meter.cc\n", "c++ -pthread -std=c++0x -march=native -O3 -funroll-loops -c src/fasttext.cc\n", "\u001b[01m\u001b[Ksrc/fasttext.cc:\u001b[m\u001b[K In member function ‘\u001b[01m\u001b[Kvoid fasttext::FastText::quantize(const fasttext::Args&)\u001b[m\u001b[K’:\n", "\u001b[01m\u001b[Ksrc/fasttext.cc:302:45:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K‘\u001b[01m\u001b[Kstd::vector fasttext::FastText::selectEmbeddings(int32_t) const\u001b[m\u001b[K’ is deprecated: selectEmbeddings is being deprecated. [\u001b[01;35m\u001b[K-Wdeprecated-declarations\u001b[m\u001b[K]\n", " auto idx = selectEmbeddings(qargs.cutoff\u001b[01;35m\u001b[K)\u001b[m\u001b[K;\n", " \u001b[01;35m\u001b[K^\u001b[m\u001b[K\n", "\u001b[01m\u001b[Ksrc/fasttext.cc:279:22:\u001b[m\u001b[K \u001b[01;36m\u001b[Knote: \u001b[m\u001b[Kdeclared here\n", " std::vector \u001b[01;36m\u001b[KFastText\u001b[m\u001b[K::selectEmbeddings(int32_t cutoff) const {\n", " \u001b[01;36m\u001b[K^~~~~~~~\u001b[m\u001b[K\n", "\u001b[01m\u001b[Ksrc/fasttext.cc:\u001b[m\u001b[K In member function ‘\u001b[01m\u001b[Kvoid fasttext::FastText::lazyComputeWordVectors()\u001b[m\u001b[K’:\n", "\u001b[01m\u001b[Ksrc/fasttext.cc:531:40:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K‘\u001b[01m\u001b[Kvoid fasttext::FastText::precomputeWordVectors(fasttext::Matrix&)\u001b[m\u001b[K’ is deprecated: precomputeWordVectors is being deprecated. [\u001b[01;35m\u001b[K-Wdeprecated-declarations\u001b[m\u001b[K]\n", " precomputeWordVectors(*wordVectors_\u001b[01;35m\u001b[K)\u001b[m\u001b[K;\n", " \u001b[01;35m\u001b[K^\u001b[m\u001b[K\n", "\u001b[01m\u001b[Ksrc/fasttext.cc:514:6:\u001b[m\u001b[K \u001b[01;36m\u001b[Knote: \u001b[m\u001b[Kdeclared here\n", " void \u001b[01;36m\u001b[KFastText\u001b[m\u001b[K::precomputeWordVectors(Matrix& wordVectors) {\n", " \u001b[01;36m\u001b[K^~~~~~~~\u001b[m\u001b[K\n", "\u001b[01m\u001b[Ksrc/fasttext.cc:\u001b[m\u001b[K In member function ‘\u001b[01m\u001b[Kvoid fasttext::FastText::trainThread(int32_t)\u001b[m\u001b[K’:\n", "\u001b[01m\u001b[Ksrc/fasttext.cc:650:41:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K‘\u001b[01m\u001b[Kvoid fasttext::FastText::supervised(fasttext::Model&, fasttext::real, const std::vector&, const std::vector&)\u001b[m\u001b[K’ is deprecated: supervised is being deprecated. [\u001b[01;35m\u001b[K-Wdeprecated-declarations\u001b[m\u001b[K]\n", " supervised(model, lr, line, labels\u001b[01;35m\u001b[K)\u001b[m\u001b[K;\n", " \u001b[01;35m\u001b[K^\u001b[m\u001b[K\n", "\u001b[01m\u001b[Ksrc/fasttext.cc:338:6:\u001b[m\u001b[K \u001b[01;36m\u001b[Knote: \u001b[m\u001b[Kdeclared here\n", " void \u001b[01;36m\u001b[KFastText\u001b[m\u001b[K::supervised(\n", " \u001b[01;36m\u001b[K^~~~~~~~\u001b[m\u001b[K\n", "\u001b[01m\u001b[Ksrc/fasttext.cc:653:27:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K‘\u001b[01m\u001b[Kvoid fasttext::FastText::cbow(fasttext::Model&, fasttext::real, const std::vector&)\u001b[m\u001b[K’ is deprecated: cbow is being deprecated. [\u001b[01;35m\u001b[K-Wdeprecated-declarations\u001b[m\u001b[K]\n", " cbow(model, lr, line\u001b[01;35m\u001b[K)\u001b[m\u001b[K;\n", " \u001b[01;35m\u001b[K^\u001b[m\u001b[K\n", "\u001b[01m\u001b[Ksrc/fasttext.cc:355:6:\u001b[m\u001b[K \u001b[01;36m\u001b[Knote: \u001b[m\u001b[Kdeclared here\n", " void \u001b[01;36m\u001b[KFastText\u001b[m\u001b[K::cbow(Model& model, real lr, const std::vector& line) {\n", " \u001b[01;36m\u001b[K^~~~~~~~\u001b[m\u001b[K\n", "\u001b[01m\u001b[Ksrc/fasttext.cc:656:31:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K‘\u001b[01m\u001b[Kvoid fasttext::FastText::skipgram(fasttext::Model&, fasttext::real, const std::vector&)\u001b[m\u001b[K’ is deprecated: skipgram is being deprecated. [\u001b[01;35m\u001b[K-Wdeprecated-declarations\u001b[m\u001b[K]\n", " skipgram(model, lr, line\u001b[01;35m\u001b[K)\u001b[m\u001b[K;\n", " \u001b[01;35m\u001b[K^\u001b[m\u001b[K\n", "\u001b[01m\u001b[Ksrc/fasttext.cc:371:6:\u001b[m\u001b[K \u001b[01;36m\u001b[Knote: \u001b[m\u001b[Kdeclared here\n", " void \u001b[01;36m\u001b[KFastText\u001b[m\u001b[K::skipgram(\n", " \u001b[01;36m\u001b[K^~~~~~~~\u001b[m\u001b[K\n", "c++ -pthread -std=c++0x -march=native -O3 -funroll-loops args.o dictionary.o productquantizer.o matrix.o qmatrix.o vector.o model.o utils.o meter.o fasttext.o src/main.cc -o fasttext\n" ], "name": "stdout" } ] }, { "metadata": { "id": "5JauDviyqqL-", "colab_type": "text" }, "cell_type": "markdown", "source": [ "## Make simple dataset" ] }, { "metadata": { "id": "ALMQ3gjFqqZS", "colab_type": "code", "colab": {} }, "cell_type": "code", "source": [ "# 1 is positive, 0 is negative\n", "f = open('train.txt', 'w')\n", "f.write('__label__1 i love you\\n')\n", "f.write('__label__1 he loves me\\n')\n", "f.write('__label__1 she likes baseball\\n')\n", "f.write('__label__0 i hate you\\n')\n", "f.write('__label__0 sorry for that\\n')\n", "f.write('__label__0 this is awful')\n", "f.close()\n", "\n", "f = open('test.txt', 'w')\n", "f.write('sorry hate you')\n", "f.close()" ], "execution_count": 0, "outputs": [] }, { "metadata": { "id": "i3_PpexwsN_a", "colab_type": "text" }, "cell_type": "markdown", "source": [ "## Training" ] }, { "metadata": { "id": "q06m76JusOQ8", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 92 }, "outputId": "4ed3502d-4aec-4d06-cb02-b8392978ce14" }, "cell_type": "code", "source": [ "!./fasttext supervised -input train.txt -output model -dim 2" ], "execution_count": 18, "outputs": [ { "output_type": "stream", "text": [ "\rRead 0M words\n", "Number of words: 17\n", "Number of labels: 2\n", "\rProgress: 100.0% words/sec/thread: 17608 lr: 0.000000 loss: 0.672308 ETA: 0h 0m\n" ], "name": "stdout" } ] }, { "metadata": { "id": "C77MXO-GsOpi", "colab_type": "text" }, "cell_type": "markdown", "source": [ "## Predict" ] }, { "metadata": { "id": "y1yDPCjVsO6x", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 36 }, "outputId": "8963d7bd-01c8-40b9-e1ee-1446cb1b3454" }, "cell_type": "code", "source": [ "!cat test.txt\n", "!./fasttext predict model.bin test.txt" ], "execution_count": 22, "outputs": [ { "output_type": "stream", "text": [ "sorry hate you__label__0\n" ], "name": "stdout" } ] } ] } ================================================ FILE: 1-3.FastText/test.txt ================================================ sorry hate you ================================================ FILE: 1-3.FastText/train.txt ================================================ __label__1 i love you __label__1 he loves me __label__1 she likes baseball __label__0 i hate you __label__0 sorry for that __label__0 this is awful ================================================ FILE: 2-1.TextCNN/TextCNN.ipynb ================================================ { "cells": [ { "cell_type": "code", "metadata": {}, "source": [ "# code by Tae Hwan Jung @graykode\n", "import numpy as np\n", "import torch\n", "import torch.nn as nn\n", "import torch.optim as optim\n", "import torch.nn.functional as F\n", "\n", "class TextCNN(nn.Module):\n", " def __init__(self):\n", " super(TextCNN, self).__init__()\n", " self.num_filters_total = num_filters * len(filter_sizes)\n", " self.W = nn.Embedding(vocab_size, embedding_size)\n", " self.Weight = nn.Linear(self.num_filters_total, num_classes, bias=False)\n", " self.Bias = nn.Parameter(torch.ones([num_classes]))\n", " self.filter_list = nn.ModuleList([nn.Conv2d(1, num_filters, (size, embedding_size)) for size in filter_sizes])\n", "\n", " def forward(self, X):\n", " embedded_chars = self.W(X) # [batch_size, sequence_length, sequence_length]\n", " embedded_chars = embedded_chars.unsqueeze(1) # add channel(=1) [batch, channel(=1), sequence_length, embedding_size]\n", "\n", " pooled_outputs = []\n", " for i, conv in enumerate(self.filter_list):\n", " # conv : [input_channel(=1), output_channel(=3), (filter_height, filter_width), bias_option]\n", " h = F.relu(conv(embedded_chars))\n", " # mp : ((filter_height, filter_width))\n", " mp = nn.MaxPool2d((sequence_length - filter_sizes[i] + 1, 1))\n", " # pooled : [batch_size(=6), output_height(=1), output_width(=1), output_channel(=3)]\n", " pooled = mp(h).permute(0, 3, 2, 1)\n", " pooled_outputs.append(pooled)\n", "\n", " h_pool = torch.cat(pooled_outputs, len(filter_sizes)) # [batch_size(=6), output_height(=1), output_width(=1), output_channel(=3) * 3]\n", " h_pool_flat = torch.reshape(h_pool, [-1, self.num_filters_total]) # [batch_size(=6), output_height * output_width * (output_channel * 3)]\n", " model = self.Weight(h_pool_flat) + self.Bias # [batch_size, num_classes]\n", " return model\n", "\n", "if __name__ == '__main__':\n", " embedding_size = 2 # embedding size\n", " sequence_length = 3 # sequence length\n", " num_classes = 2 # number of classes\n", " filter_sizes = [2, 2, 2] # n-gram windows\n", " num_filters = 3 # number of filters\n", "\n", " # 3 words sentences (=sequence_length is 3)\n", " sentences = [\"i love you\", \"he loves me\", \"she likes baseball\", \"i hate you\", \"sorry for that\", \"this is awful\"]\n", " labels = [1, 1, 1, 0, 0, 0] # 1 is good, 0 is not good.\n", "\n", " word_list = \" \".join(sentences).split()\n", " word_list = list(set(word_list))\n", " word_dict = {w: i for i, w in enumerate(word_list)}\n", " vocab_size = len(word_dict)\n", "\n", " model = TextCNN()\n", "\n", " criterion = nn.CrossEntropyLoss()\n", " optimizer = optim.Adam(model.parameters(), lr=0.001)\n", "\n", " inputs = torch.LongTensor([np.asarray([word_dict[n] for n in sen.split()]) for sen in sentences])\n", " targets = torch.LongTensor([out for out in labels]) # To using Torch Softmax Loss function\n", "\n", " # Training\n", " for epoch in range(5000):\n", " optimizer.zero_grad()\n", " output = model(inputs)\n", "\n", " # output : [batch_size, num_classes], target_batch : [batch_size] (LongTensor, not one-hot)\n", " loss = criterion(output, targets)\n", " if (epoch + 1) % 1000 == 0:\n", " print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n", "\n", " loss.backward()\n", " optimizer.step()\n", "\n", " # Test\n", " test_text = 'sorry hate you'\n", " tests = [np.asarray([word_dict[n] for n in test_text.split()])]\n", " test_batch = torch.LongTensor(tests)\n", "\n", " # Predict\n", " predict = model(test_batch).data.max(1, keepdim=True)[1]\n", " if predict[0][0] == 0:\n", " print(test_text,\"is Bad Mean...\")\n", " else:\n", " print(test_text,\"is Good Mean!!\")" ], "outputs": [], "execution_count": null } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: 2-1.TextCNN/TextCNN.py ================================================ # %% # code by Tae Hwan Jung @graykode import numpy as np import torch import torch.nn as nn import torch.optim as optim import torch.nn.functional as F class TextCNN(nn.Module): def __init__(self): super(TextCNN, self).__init__() self.num_filters_total = num_filters * len(filter_sizes) self.W = nn.Embedding(vocab_size, embedding_size) self.Weight = nn.Linear(self.num_filters_total, num_classes, bias=False) self.Bias = nn.Parameter(torch.ones([num_classes])) self.filter_list = nn.ModuleList([nn.Conv2d(1, num_filters, (size, embedding_size)) for size in filter_sizes]) def forward(self, X): embedded_chars = self.W(X) # [batch_size, sequence_length, sequence_length] embedded_chars = embedded_chars.unsqueeze(1) # add channel(=1) [batch, channel(=1), sequence_length, embedding_size] pooled_outputs = [] for i, conv in enumerate(self.filter_list): # conv : [input_channel(=1), output_channel(=3), (filter_height, filter_width), bias_option] h = F.relu(conv(embedded_chars)) # mp : ((filter_height, filter_width)) mp = nn.MaxPool2d((sequence_length - filter_sizes[i] + 1, 1)) # pooled : [batch_size(=6), output_height(=1), output_width(=1), output_channel(=3)] pooled = mp(h).permute(0, 3, 2, 1) pooled_outputs.append(pooled) h_pool = torch.cat(pooled_outputs, len(filter_sizes)) # [batch_size(=6), output_height(=1), output_width(=1), output_channel(=3) * 3] h_pool_flat = torch.reshape(h_pool, [-1, self.num_filters_total]) # [batch_size(=6), output_height * output_width * (output_channel * 3)] model = self.Weight(h_pool_flat) + self.Bias # [batch_size, num_classes] return model if __name__ == '__main__': embedding_size = 2 # embedding size sequence_length = 3 # sequence length num_classes = 2 # number of classes filter_sizes = [2, 2, 2] # n-gram windows num_filters = 3 # number of filters # 3 words sentences (=sequence_length is 3) sentences = ["i love you", "he loves me", "she likes baseball", "i hate you", "sorry for that", "this is awful"] labels = [1, 1, 1, 0, 0, 0] # 1 is good, 0 is not good. word_list = " ".join(sentences).split() word_list = list(set(word_list)) word_dict = {w: i for i, w in enumerate(word_list)} vocab_size = len(word_dict) model = TextCNN() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) inputs = torch.LongTensor([np.asarray([word_dict[n] for n in sen.split()]) for sen in sentences]) targets = torch.LongTensor([out for out in labels]) # To using Torch Softmax Loss function # Training for epoch in range(5000): optimizer.zero_grad() output = model(inputs) # output : [batch_size, num_classes], target_batch : [batch_size] (LongTensor, not one-hot) loss = criterion(output, targets) if (epoch + 1) % 1000 == 0: print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) loss.backward() optimizer.step() # Test test_text = 'sorry hate you' tests = [np.asarray([word_dict[n] for n in test_text.split()])] test_batch = torch.LongTensor(tests) # Predict predict = model(test_batch).data.max(1, keepdim=True)[1] if predict[0][0] == 0: print(test_text,"is Bad Mean...") else: print(test_text,"is Good Mean!!") ================================================ FILE: 3-1.TextRNN/TextRNN.ipynb ================================================ { "cells": [ { "cell_type": "code", "metadata": {}, "source": [ "# code by Tae Hwan Jung @graykode\n", "import numpy as np\n", "import torch\n", "import torch.nn as nn\n", "import torch.optim as optim\n", "\n", "def make_batch():\n", " input_batch = []\n", " target_batch = []\n", "\n", " for sen in sentences:\n", " word = sen.split() # space tokenizer\n", " input = [word_dict[n] for n in word[:-1]] # create (1~n-1) as input\n", " target = word_dict[word[-1]] # create (n) as target, We usually call this 'casual language model'\n", "\n", " input_batch.append(np.eye(n_class)[input])\n", " target_batch.append(target)\n", "\n", " return input_batch, target_batch\n", "\n", "class TextRNN(nn.Module):\n", " def __init__(self):\n", " super(TextRNN, self).__init__()\n", " self.rnn = nn.RNN(input_size=n_class, hidden_size=n_hidden)\n", " self.W = nn.Linear(n_hidden, n_class, bias=False)\n", " self.b = nn.Parameter(torch.ones([n_class]))\n", "\n", " def forward(self, hidden, X):\n", " X = X.transpose(0, 1) # X : [n_step, batch_size, n_class]\n", " outputs, hidden = self.rnn(X, hidden)\n", " # outputs : [n_step, batch_size, num_directions(=1) * n_hidden]\n", " # hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\n", " outputs = outputs[-1] # [batch_size, num_directions(=1) * n_hidden]\n", " model = self.W(outputs) + self.b # model : [batch_size, n_class]\n", " return model\n", "\n", "if __name__ == '__main__':\n", " n_step = 2 # number of cells(= number of Step)\n", " n_hidden = 5 # number of hidden units in one cell\n", "\n", " sentences = [\"i like dog\", \"i love coffee\", \"i hate milk\"]\n", "\n", " word_list = \" \".join(sentences).split()\n", " word_list = list(set(word_list))\n", " word_dict = {w: i for i, w in enumerate(word_list)}\n", " number_dict = {i: w for i, w in enumerate(word_list)}\n", " n_class = len(word_dict)\n", " batch_size = len(sentences)\n", "\n", " model = TextRNN()\n", "\n", " criterion = nn.CrossEntropyLoss()\n", " optimizer = optim.Adam(model.parameters(), lr=0.001)\n", "\n", " input_batch, target_batch = make_batch()\n", " input_batch = torch.FloatTensor(input_batch)\n", " target_batch = torch.LongTensor(target_batch)\n", "\n", " # Training\n", " for epoch in range(5000):\n", " optimizer.zero_grad()\n", "\n", " # hidden : [num_layers * num_directions, batch, hidden_size]\n", " hidden = torch.zeros(1, batch_size, n_hidden)\n", " # input_batch : [batch_size, n_step, n_class]\n", " output = model(hidden, input_batch)\n", "\n", " # output : [batch_size, n_class], target_batch : [batch_size] (LongTensor, not one-hot)\n", " loss = criterion(output, target_batch)\n", " if (epoch + 1) % 1000 == 0:\n", " print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n", "\n", " loss.backward()\n", " optimizer.step()\n", "\n", " input = [sen.split()[:2] for sen in sentences]\n", "\n", " # Predict\n", " hidden = torch.zeros(1, batch_size, n_hidden)\n", " predict = model(hidden, input_batch).data.max(1, keepdim=True)[1]\n", " print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])" ], "outputs": [], "execution_count": null } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: 3-1.TextRNN/TextRNN.py ================================================ # %% # code by Tae Hwan Jung @graykode import numpy as np import torch import torch.nn as nn import torch.optim as optim def make_batch(): input_batch = [] target_batch = [] for sen in sentences: word = sen.split() # space tokenizer input = [word_dict[n] for n in word[:-1]] # create (1~n-1) as input target = word_dict[word[-1]] # create (n) as target, We usually call this 'casual language model' input_batch.append(np.eye(n_class)[input]) target_batch.append(target) return input_batch, target_batch class TextRNN(nn.Module): def __init__(self): super(TextRNN, self).__init__() self.rnn = nn.RNN(input_size=n_class, hidden_size=n_hidden) self.W = nn.Linear(n_hidden, n_class, bias=False) self.b = nn.Parameter(torch.ones([n_class])) def forward(self, hidden, X): X = X.transpose(0, 1) # X : [n_step, batch_size, n_class] outputs, hidden = self.rnn(X, hidden) # outputs : [n_step, batch_size, num_directions(=1) * n_hidden] # hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden] outputs = outputs[-1] # [batch_size, num_directions(=1) * n_hidden] model = self.W(outputs) + self.b # model : [batch_size, n_class] return model if __name__ == '__main__': n_step = 2 # number of cells(= number of Step) n_hidden = 5 # number of hidden units in one cell sentences = ["i like dog", "i love coffee", "i hate milk"] word_list = " ".join(sentences).split() word_list = list(set(word_list)) word_dict = {w: i for i, w in enumerate(word_list)} number_dict = {i: w for i, w in enumerate(word_list)} n_class = len(word_dict) batch_size = len(sentences) model = TextRNN() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) input_batch, target_batch = make_batch() input_batch = torch.FloatTensor(input_batch) target_batch = torch.LongTensor(target_batch) # Training for epoch in range(5000): optimizer.zero_grad() # hidden : [num_layers * num_directions, batch, hidden_size] hidden = torch.zeros(1, batch_size, n_hidden) # input_batch : [batch_size, n_step, n_class] output = model(hidden, input_batch) # output : [batch_size, n_class], target_batch : [batch_size] (LongTensor, not one-hot) loss = criterion(output, target_batch) if (epoch + 1) % 1000 == 0: print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) loss.backward() optimizer.step() input = [sen.split()[:2] for sen in sentences] # Predict hidden = torch.zeros(1, batch_size, n_hidden) predict = model(hidden, input_batch).data.max(1, keepdim=True)[1] print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()]) ================================================ FILE: 3-2.TextLSTM/TextLSTM.ipynb ================================================ { "cells": [ { "cell_type": "code", "metadata": {}, "source": [ "# code by Tae Hwan Jung @graykode\n", "import numpy as np\n", "import torch\n", "import torch.nn as nn\n", "import torch.optim as optim\n", "\n", "def make_batch():\n", " input_batch, target_batch = [], []\n", "\n", " for seq in seq_data:\n", " input = [word_dict[n] for n in seq[:-1]] # 'm', 'a' , 'k' is input\n", " target = word_dict[seq[-1]] # 'e' is target\n", " input_batch.append(np.eye(n_class)[input])\n", " target_batch.append(target)\n", "\n", " return input_batch, target_batch\n", "\n", "class TextLSTM(nn.Module):\n", " def __init__(self):\n", " super(TextLSTM, self).__init__()\n", "\n", " self.lstm = nn.LSTM(input_size=n_class, hidden_size=n_hidden)\n", " self.W = nn.Linear(n_hidden, n_class, bias=False)\n", " self.b = nn.Parameter(torch.ones([n_class]))\n", "\n", " def forward(self, X):\n", " input = X.transpose(0, 1) # X : [n_step, batch_size, n_class]\n", "\n", " hidden_state = torch.zeros(1, len(X), n_hidden) # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\n", " cell_state = torch.zeros(1, len(X), n_hidden) # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\n", "\n", " outputs, (_, _) = self.lstm(input, (hidden_state, cell_state))\n", " outputs = outputs[-1] # [batch_size, n_hidden]\n", " model = self.W(outputs) + self.b # model : [batch_size, n_class]\n", " return model\n", "\n", "if __name__ == '__main__':\n", " n_step = 3 # number of cells(= number of Step)\n", " n_hidden = 128 # number of hidden units in one cell\n", "\n", " char_arr = [c for c in 'abcdefghijklmnopqrstuvwxyz']\n", " word_dict = {n: i for i, n in enumerate(char_arr)}\n", " number_dict = {i: w for i, w in enumerate(char_arr)}\n", " n_class = len(word_dict) # number of class(=number of vocab)\n", "\n", " seq_data = ['make', 'need', 'coal', 'word', 'love', 'hate', 'live', 'home', 'hash', 'star']\n", "\n", " model = TextLSTM()\n", "\n", " criterion = nn.CrossEntropyLoss()\n", " optimizer = optim.Adam(model.parameters(), lr=0.001)\n", "\n", " input_batch, target_batch = make_batch()\n", " input_batch = torch.FloatTensor(input_batch)\n", " target_batch = torch.LongTensor(target_batch)\n", "\n", " # Training\n", " for epoch in range(1000):\n", " optimizer.zero_grad()\n", "\n", " output = model(input_batch)\n", " loss = criterion(output, target_batch)\n", " if (epoch + 1) % 100 == 0:\n", " print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n", "\n", " loss.backward()\n", " optimizer.step()\n", "\n", " inputs = [sen[:3] for sen in seq_data]\n", "\n", " predict = model(input_batch).data.max(1, keepdim=True)[1]\n", " print(inputs, '->', [number_dict[n.item()] for n in predict.squeeze()])" ], "outputs": [], "execution_count": null } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: 3-2.TextLSTM/TextLSTM.py ================================================ # %% # code by Tae Hwan Jung @graykode import numpy as np import torch import torch.nn as nn import torch.optim as optim def make_batch(): input_batch, target_batch = [], [] for seq in seq_data: input = [word_dict[n] for n in seq[:-1]] # 'm', 'a' , 'k' is input target = word_dict[seq[-1]] # 'e' is target input_batch.append(np.eye(n_class)[input]) target_batch.append(target) return input_batch, target_batch class TextLSTM(nn.Module): def __init__(self): super(TextLSTM, self).__init__() self.lstm = nn.LSTM(input_size=n_class, hidden_size=n_hidden) self.W = nn.Linear(n_hidden, n_class, bias=False) self.b = nn.Parameter(torch.ones([n_class])) def forward(self, X): input = X.transpose(0, 1) # X : [n_step, batch_size, n_class] hidden_state = torch.zeros(1, len(X), n_hidden) # [num_layers(=1) * num_directions(=1), batch_size, n_hidden] cell_state = torch.zeros(1, len(X), n_hidden) # [num_layers(=1) * num_directions(=1), batch_size, n_hidden] outputs, (_, _) = self.lstm(input, (hidden_state, cell_state)) outputs = outputs[-1] # [batch_size, n_hidden] model = self.W(outputs) + self.b # model : [batch_size, n_class] return model if __name__ == '__main__': n_step = 3 # number of cells(= number of Step) n_hidden = 128 # number of hidden units in one cell char_arr = [c for c in 'abcdefghijklmnopqrstuvwxyz'] word_dict = {n: i for i, n in enumerate(char_arr)} number_dict = {i: w for i, w in enumerate(char_arr)} n_class = len(word_dict) # number of class(=number of vocab) seq_data = ['make', 'need', 'coal', 'word', 'love', 'hate', 'live', 'home', 'hash', 'star'] model = TextLSTM() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) input_batch, target_batch = make_batch() input_batch = torch.FloatTensor(input_batch) target_batch = torch.LongTensor(target_batch) # Training for epoch in range(1000): optimizer.zero_grad() output = model(input_batch) loss = criterion(output, target_batch) if (epoch + 1) % 100 == 0: print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) loss.backward() optimizer.step() inputs = [sen[:3] for sen in seq_data] predict = model(input_batch).data.max(1, keepdim=True)[1] print(inputs, '->', [number_dict[n.item()] for n in predict.squeeze()]) ================================================ FILE: 3-3.Bi-LSTM/Bi-LSTM.ipynb ================================================ { "cells": [ { "cell_type": "code", "metadata": {}, "source": [ "# code by Tae Hwan Jung @graykode\n", "import numpy as np\n", "import torch\n", "import torch.nn as nn\n", "import torch.optim as optim\n", "\n", "def make_batch():\n", " input_batch = []\n", " target_batch = []\n", "\n", " words = sentence.split()\n", " for i, word in enumerate(words[:-1]):\n", " input = [word_dict[n] for n in words[:(i + 1)]]\n", " input = input + [0] * (max_len - len(input))\n", " target = word_dict[words[i + 1]]\n", " input_batch.append(np.eye(n_class)[input])\n", " target_batch.append(target)\n", "\n", " return input_batch, target_batch\n", "\n", "class BiLSTM(nn.Module):\n", " def __init__(self):\n", " super(BiLSTM, self).__init__()\n", "\n", " self.lstm = nn.LSTM(input_size=n_class, hidden_size=n_hidden, bidirectional=True)\n", " self.W = nn.Linear(n_hidden * 2, n_class, bias=False)\n", " self.b = nn.Parameter(torch.ones([n_class]))\n", "\n", " def forward(self, X):\n", " input = X.transpose(0, 1) # input : [n_step, batch_size, n_class]\n", "\n", " hidden_state = torch.zeros(1*2, len(X), n_hidden) # [num_layers(=1) * num_directions(=2), batch_size, n_hidden]\n", " cell_state = torch.zeros(1*2, len(X), n_hidden) # [num_layers(=1) * num_directions(=2), batch_size, n_hidden]\n", "\n", " outputs, (_, _) = self.lstm(input, (hidden_state, cell_state))\n", " outputs = outputs[-1] # [batch_size, n_hidden]\n", " model = self.W(outputs) + self.b # model : [batch_size, n_class]\n", " return model\n", "\n", "if __name__ == '__main__':\n", " n_hidden = 5 # number of hidden units in one cell\n", "\n", " sentence = (\n", " 'Lorem ipsum dolor sit amet consectetur adipisicing elit '\n", " 'sed do eiusmod tempor incididunt ut labore et dolore magna '\n", " 'aliqua Ut enim ad minim veniam quis nostrud exercitation'\n", " )\n", "\n", " word_dict = {w: i for i, w in enumerate(list(set(sentence.split())))}\n", " number_dict = {i: w for i, w in enumerate(list(set(sentence.split())))}\n", " n_class = len(word_dict)\n", " max_len = len(sentence.split())\n", "\n", " model = BiLSTM()\n", "\n", " criterion = nn.CrossEntropyLoss()\n", " optimizer = optim.Adam(model.parameters(), lr=0.001)\n", "\n", " input_batch, target_batch = make_batch()\n", " input_batch = torch.FloatTensor(input_batch)\n", " target_batch = torch.LongTensor(target_batch)\n", "\n", " # Training\n", " for epoch in range(10000):\n", " optimizer.zero_grad()\n", " output = model(input_batch)\n", " loss = criterion(output, target_batch)\n", " if (epoch + 1) % 1000 == 0:\n", " print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n", "\n", " loss.backward()\n", " optimizer.step()\n", "\n", " predict = model(input_batch).data.max(1, keepdim=True)[1]\n", " print(sentence)\n", " print([number_dict[n.item()] for n in predict.squeeze()])\n" ], "outputs": [], "execution_count": null } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: 3-3.Bi-LSTM/Bi-LSTM.py ================================================ # %% # code by Tae Hwan Jung @graykode import numpy as np import torch import torch.nn as nn import torch.optim as optim def make_batch(): input_batch = [] target_batch = [] words = sentence.split() for i, word in enumerate(words[:-1]): input = [word_dict[n] for n in words[:(i + 1)]] input = input + [0] * (max_len - len(input)) target = word_dict[words[i + 1]] input_batch.append(np.eye(n_class)[input]) target_batch.append(target) return input_batch, target_batch class BiLSTM(nn.Module): def __init__(self): super(BiLSTM, self).__init__() self.lstm = nn.LSTM(input_size=n_class, hidden_size=n_hidden, bidirectional=True) self.W = nn.Linear(n_hidden * 2, n_class, bias=False) self.b = nn.Parameter(torch.ones([n_class])) def forward(self, X): input = X.transpose(0, 1) # input : [n_step, batch_size, n_class] hidden_state = torch.zeros(1*2, len(X), n_hidden) # [num_layers(=1) * num_directions(=2), batch_size, n_hidden] cell_state = torch.zeros(1*2, len(X), n_hidden) # [num_layers(=1) * num_directions(=2), batch_size, n_hidden] outputs, (_, _) = self.lstm(input, (hidden_state, cell_state)) outputs = outputs[-1] # [batch_size, n_hidden] model = self.W(outputs) + self.b # model : [batch_size, n_class] return model if __name__ == '__main__': n_hidden = 5 # number of hidden units in one cell sentence = ( 'Lorem ipsum dolor sit amet consectetur adipisicing elit ' 'sed do eiusmod tempor incididunt ut labore et dolore magna ' 'aliqua Ut enim ad minim veniam quis nostrud exercitation' ) word_dict = {w: i for i, w in enumerate(list(set(sentence.split())))} number_dict = {i: w for i, w in enumerate(list(set(sentence.split())))} n_class = len(word_dict) max_len = len(sentence.split()) model = BiLSTM() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) input_batch, target_batch = make_batch() input_batch = torch.FloatTensor(input_batch) target_batch = torch.LongTensor(target_batch) # Training for epoch in range(10000): optimizer.zero_grad() output = model(input_batch) loss = criterion(output, target_batch) if (epoch + 1) % 1000 == 0: print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) loss.backward() optimizer.step() predict = model(input_batch).data.max(1, keepdim=True)[1] print(sentence) print([number_dict[n.item()] for n in predict.squeeze()]) ================================================ FILE: 4-1.Seq2Seq/Seq2Seq.ipynb ================================================ { "cells": [ { "cell_type": "code", "metadata": {}, "source": [ "# code by Tae Hwan Jung @graykode\n", "import numpy as np\n", "import torch\n", "import torch.nn as nn\n", "\n", "# S: Symbol that shows starting of decoding input\n", "# E: Symbol that shows starting of decoding output\n", "# P: Symbol that will fill in blank sequence if current batch data size is short than time steps\n", "\n", "def make_batch():\n", " input_batch, output_batch, target_batch = [], [], []\n", "\n", " for seq in seq_data:\n", " for i in range(2):\n", " seq[i] = seq[i] + 'P' * (n_step - len(seq[i]))\n", "\n", " input = [num_dic[n] for n in seq[0]]\n", " output = [num_dic[n] for n in ('S' + seq[1])]\n", " target = [num_dic[n] for n in (seq[1] + 'E')]\n", "\n", " input_batch.append(np.eye(n_class)[input])\n", " output_batch.append(np.eye(n_class)[output])\n", " target_batch.append(target) # not one-hot\n", "\n", " # make tensor\n", " return torch.FloatTensor(input_batch), torch.FloatTensor(output_batch), torch.LongTensor(target_batch)\n", "\n", "# make test batch\n", "def make_testbatch(input_word):\n", " input_batch, output_batch = [], []\n", "\n", " input_w = input_word + 'P' * (n_step - len(input_word))\n", " input = [num_dic[n] for n in input_w]\n", " output = [num_dic[n] for n in 'S' + 'P' * n_step]\n", "\n", " input_batch = np.eye(n_class)[input]\n", " output_batch = np.eye(n_class)[output]\n", "\n", " return torch.FloatTensor(input_batch).unsqueeze(0), torch.FloatTensor(output_batch).unsqueeze(0)\n", "\n", "# Model\n", "class Seq2Seq(nn.Module):\n", " def __init__(self):\n", " super(Seq2Seq, self).__init__()\n", "\n", " self.enc_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)\n", " self.dec_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)\n", " self.fc = nn.Linear(n_hidden, n_class)\n", "\n", " def forward(self, enc_input, enc_hidden, dec_input):\n", " enc_input = enc_input.transpose(0, 1) # enc_input: [max_len(=n_step, time step), batch_size, n_class]\n", " dec_input = dec_input.transpose(0, 1) # dec_input: [max_len(=n_step, time step), batch_size, n_class]\n", "\n", " # enc_states : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\n", " _, enc_states = self.enc_cell(enc_input, enc_hidden)\n", " # outputs : [max_len+1(=6), batch_size, num_directions(=1) * n_hidden(=128)]\n", " outputs, _ = self.dec_cell(dec_input, enc_states)\n", "\n", " model = self.fc(outputs) # model : [max_len+1(=6), batch_size, n_class]\n", " return model\n", "\n", "if __name__ == '__main__':\n", " n_step = 5\n", " n_hidden = 128\n", "\n", " char_arr = [c for c in 'SEPabcdefghijklmnopqrstuvwxyz']\n", " num_dic = {n: i for i, n in enumerate(char_arr)}\n", " seq_data = [['man', 'women'], ['black', 'white'], ['king', 'queen'], ['girl', 'boy'], ['up', 'down'], ['high', 'low']]\n", "\n", " n_class = len(num_dic)\n", " batch_size = len(seq_data)\n", "\n", " model = Seq2Seq()\n", "\n", " criterion = nn.CrossEntropyLoss()\n", " optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n", "\n", " input_batch, output_batch, target_batch = make_batch()\n", "\n", " for epoch in range(5000):\n", " # make hidden shape [num_layers * num_directions, batch_size, n_hidden]\n", " hidden = torch.zeros(1, batch_size, n_hidden)\n", "\n", " optimizer.zero_grad()\n", " # input_batch : [batch_size, max_len(=n_step, time step), n_class]\n", " # output_batch : [batch_size, max_len+1(=n_step, time step) (becase of 'S' or 'E'), n_class]\n", " # target_batch : [batch_size, max_len+1(=n_step, time step)], not one-hot\n", " output = model(input_batch, hidden, output_batch)\n", " # output : [max_len+1, batch_size, n_class]\n", " output = output.transpose(0, 1) # [batch_size, max_len+1(=6), n_class]\n", " loss = 0\n", " for i in range(0, len(target_batch)):\n", " # output[i] : [max_len+1, n_class, target_batch[i] : max_len+1]\n", " loss += criterion(output[i], target_batch[i])\n", " if (epoch + 1) % 1000 == 0:\n", " print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n", " loss.backward()\n", " optimizer.step()\n", "\n", " # Test\n", " def translate(word):\n", " input_batch, output_batch = make_testbatch(word)\n", "\n", " # make hidden shape [num_layers * num_directions, batch_size, n_hidden]\n", " hidden = torch.zeros(1, 1, n_hidden)\n", " output = model(input_batch, hidden, output_batch)\n", " # output : [max_len+1(=6), batch_size(=1), n_class]\n", "\n", " predict = output.data.max(2, keepdim=True)[1] # select n_class dimension\n", " decoded = [char_arr[i] for i in predict]\n", " end = decoded.index('E')\n", " translated = ''.join(decoded[:end])\n", "\n", " return translated.replace('P', '')\n", "\n", " print('test')\n", " print('man ->', translate('man'))\n", " print('mans ->', translate('mans'))\n", " print('king ->', translate('king'))\n", " print('black ->', translate('black'))\n", " print('upp ->', translate('upp'))" ], "outputs": [], "execution_count": null } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: 4-1.Seq2Seq/Seq2Seq.py ================================================ # %% # code by Tae Hwan Jung @graykode import numpy as np import torch import torch.nn as nn # S: Symbol that shows starting of decoding input # E: Symbol that shows starting of decoding output # P: Symbol that will fill in blank sequence if current batch data size is short than time steps def make_batch(): input_batch, output_batch, target_batch = [], [], [] for seq in seq_data: for i in range(2): seq[i] = seq[i] + 'P' * (n_step - len(seq[i])) input = [num_dic[n] for n in seq[0]] output = [num_dic[n] for n in ('S' + seq[1])] target = [num_dic[n] for n in (seq[1] + 'E')] input_batch.append(np.eye(n_class)[input]) output_batch.append(np.eye(n_class)[output]) target_batch.append(target) # not one-hot # make tensor return torch.FloatTensor(input_batch), torch.FloatTensor(output_batch), torch.LongTensor(target_batch) # make test batch def make_testbatch(input_word): input_batch, output_batch = [], [] input_w = input_word + 'P' * (n_step - len(input_word)) input = [num_dic[n] for n in input_w] output = [num_dic[n] for n in 'S' + 'P' * n_step] input_batch = np.eye(n_class)[input] output_batch = np.eye(n_class)[output] return torch.FloatTensor(input_batch).unsqueeze(0), torch.FloatTensor(output_batch).unsqueeze(0) # Model class Seq2Seq(nn.Module): def __init__(self): super(Seq2Seq, self).__init__() self.enc_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5) self.dec_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5) self.fc = nn.Linear(n_hidden, n_class) def forward(self, enc_input, enc_hidden, dec_input): enc_input = enc_input.transpose(0, 1) # enc_input: [max_len(=n_step, time step), batch_size, n_class] dec_input = dec_input.transpose(0, 1) # dec_input: [max_len(=n_step, time step), batch_size, n_class] # enc_states : [num_layers(=1) * num_directions(=1), batch_size, n_hidden] _, enc_states = self.enc_cell(enc_input, enc_hidden) # outputs : [max_len+1(=6), batch_size, num_directions(=1) * n_hidden(=128)] outputs, _ = self.dec_cell(dec_input, enc_states) model = self.fc(outputs) # model : [max_len+1(=6), batch_size, n_class] return model if __name__ == '__main__': n_step = 5 n_hidden = 128 char_arr = [c for c in 'SEPabcdefghijklmnopqrstuvwxyz'] num_dic = {n: i for i, n in enumerate(char_arr)} seq_data = [['man', 'women'], ['black', 'white'], ['king', 'queen'], ['girl', 'boy'], ['up', 'down'], ['high', 'low']] n_class = len(num_dic) batch_size = len(seq_data) model = Seq2Seq() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.001) input_batch, output_batch, target_batch = make_batch() for epoch in range(5000): # make hidden shape [num_layers * num_directions, batch_size, n_hidden] hidden = torch.zeros(1, batch_size, n_hidden) optimizer.zero_grad() # input_batch : [batch_size, max_len(=n_step, time step), n_class] # output_batch : [batch_size, max_len+1(=n_step, time step) (becase of 'S' or 'E'), n_class] # target_batch : [batch_size, max_len+1(=n_step, time step)], not one-hot output = model(input_batch, hidden, output_batch) # output : [max_len+1, batch_size, n_class] output = output.transpose(0, 1) # [batch_size, max_len+1(=6), n_class] loss = 0 for i in range(0, len(target_batch)): # output[i] : [max_len+1, n_class, target_batch[i] : max_len+1] loss += criterion(output[i], target_batch[i]) if (epoch + 1) % 1000 == 0: print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) loss.backward() optimizer.step() # Test def translate(word): input_batch, output_batch = make_testbatch(word) # make hidden shape [num_layers * num_directions, batch_size, n_hidden] hidden = torch.zeros(1, 1, n_hidden) output = model(input_batch, hidden, output_batch) # output : [max_len+1(=6), batch_size(=1), n_class] predict = output.data.max(2, keepdim=True)[1] # select n_class dimension decoded = [char_arr[i] for i in predict] end = decoded.index('E') translated = ''.join(decoded[:end]) return translated.replace('P', '') print('test') print('man ->', translate('man')) print('mans ->', translate('mans')) print('king ->', translate('king')) print('black ->', translate('black')) print('upp ->', translate('upp')) ================================================ FILE: 4-2.Seq2Seq(Attention)/Seq2Seq(Attention).ipynb ================================================ { "cells": [ { "cell_type": "code", "metadata": {}, "source": [ "# code by Tae Hwan Jung @graykode\n", "# Reference : https://github.com/hunkim/PyTorchZeroToAll/blob/master/14_2_seq2seq_att.py\n", "import numpy as np\n", "import torch\n", "import torch.nn as nn\n", "import torch.nn.functional as F\n", "import matplotlib.pyplot as plt\n", "\n", "# S: Symbol that shows starting of decoding input\n", "# E: Symbol that shows starting of decoding output\n", "# P: Symbol that will fill in blank sequence if current batch data size is short than time steps\n", "\n", "def make_batch():\n", " input_batch = [np.eye(n_class)[[word_dict[n] for n in sentences[0].split()]]]\n", " output_batch = [np.eye(n_class)[[word_dict[n] for n in sentences[1].split()]]]\n", " target_batch = [[word_dict[n] for n in sentences[2].split()]]\n", "\n", " # make tensor\n", " return torch.FloatTensor(input_batch), torch.FloatTensor(output_batch), torch.LongTensor(target_batch)\n", "\n", "class Attention(nn.Module):\n", " def __init__(self):\n", " super(Attention, self).__init__()\n", " self.enc_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)\n", " self.dec_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)\n", "\n", " # Linear for attention\n", " self.attn = nn.Linear(n_hidden, n_hidden)\n", " self.out = nn.Linear(n_hidden * 2, n_class)\n", "\n", " def forward(self, enc_inputs, hidden, dec_inputs):\n", " enc_inputs = enc_inputs.transpose(0, 1) # enc_inputs: [n_step(=n_step, time step), batch_size, n_class]\n", " dec_inputs = dec_inputs.transpose(0, 1) # dec_inputs: [n_step(=n_step, time step), batch_size, n_class]\n", "\n", " # enc_outputs : [n_step, batch_size, num_directions(=1) * n_hidden], matrix F\n", " # enc_hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\n", " enc_outputs, enc_hidden = self.enc_cell(enc_inputs, hidden)\n", "\n", " trained_attn = []\n", " hidden = enc_hidden\n", " n_step = len(dec_inputs)\n", " model = torch.empty([n_step, 1, n_class])\n", "\n", " for i in range(n_step): # each time step\n", " # dec_output : [n_step(=1), batch_size(=1), num_directions(=1) * n_hidden]\n", " # hidden : [num_layers(=1) * num_directions(=1), batch_size(=1), n_hidden]\n", " dec_output, hidden = self.dec_cell(dec_inputs[i].unsqueeze(0), hidden)\n", " attn_weights = self.get_att_weight(dec_output, enc_outputs) # attn_weights : [1, 1, n_step]\n", " trained_attn.append(attn_weights.squeeze().data.numpy())\n", "\n", " # matrix-matrix product of matrices [1,1,n_step] x [1,n_step,n_hidden] = [1,1,n_hidden]\n", " context = attn_weights.bmm(enc_outputs.transpose(0, 1))\n", " dec_output = dec_output.squeeze(0) # dec_output : [batch_size(=1), num_directions(=1) * n_hidden]\n", " context = context.squeeze(1) # [1, num_directions(=1) * n_hidden]\n", " model[i] = self.out(torch.cat((dec_output, context), 1))\n", "\n", " # make model shape [n_step, n_class]\n", " return model.transpose(0, 1).squeeze(0), trained_attn\n", "\n", " def get_att_weight(self, dec_output, enc_outputs): # get attention weight one 'dec_output' with 'enc_outputs'\n", " n_step = len(enc_outputs)\n", " attn_scores = torch.zeros(n_step) # attn_scores : [n_step]\n", "\n", " for i in range(n_step):\n", " attn_scores[i] = self.get_att_score(dec_output, enc_outputs[i])\n", "\n", " # Normalize scores to weights in range 0 to 1\n", " return F.softmax(attn_scores).view(1, 1, -1)\n", "\n", " def get_att_score(self, dec_output, enc_output): # enc_outputs [batch_size, num_directions(=1) * n_hidden]\n", " score = self.attn(enc_output) # score : [batch_size, n_hidden]\n", " return torch.dot(dec_output.view(-1), score.view(-1)) # inner product make scalar value\n", "\n", "if __name__ == '__main__':\n", " n_step = 5 # number of cells(= number of Step)\n", " n_hidden = 128 # number of hidden units in one cell\n", "\n", " sentences = ['ich mochte ein bier P', 'S i want a beer', 'i want a beer E']\n", "\n", " word_list = \" \".join(sentences).split()\n", " word_list = list(set(word_list))\n", " word_dict = {w: i for i, w in enumerate(word_list)}\n", " number_dict = {i: w for i, w in enumerate(word_list)}\n", " n_class = len(word_dict) # vocab list\n", "\n", " # hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]\n", " hidden = torch.zeros(1, 1, n_hidden)\n", "\n", " model = Attention()\n", " criterion = nn.CrossEntropyLoss()\n", " optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n", "\n", " input_batch, output_batch, target_batch = make_batch()\n", "\n", " # Train\n", " for epoch in range(2000):\n", " optimizer.zero_grad()\n", " output, _ = model(input_batch, hidden, output_batch)\n", "\n", " loss = criterion(output, target_batch.squeeze(0))\n", " if (epoch + 1) % 400 == 0:\n", " print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n", "\n", " loss.backward()\n", " optimizer.step()\n", "\n", " # Test\n", " test_batch = [np.eye(n_class)[[word_dict[n] for n in 'SPPPP']]]\n", " test_batch = torch.FloatTensor(test_batch)\n", " predict, trained_attn = model(input_batch, hidden, test_batch)\n", " predict = predict.data.max(1, keepdim=True)[1]\n", " print(sentences[0], '->', [number_dict[n.item()] for n in predict.squeeze()])\n", "\n", " # Show Attention\n", " fig = plt.figure(figsize=(5, 5))\n", " ax = fig.add_subplot(1, 1, 1)\n", " ax.matshow(trained_attn, cmap='viridis')\n", " ax.set_xticklabels([''] + sentences[0].split(), fontdict={'fontsize': 14})\n", " ax.set_yticklabels([''] + sentences[2].split(), fontdict={'fontsize': 14})\n", " plt.show()" ], "outputs": [], "execution_count": null } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: 4-2.Seq2Seq(Attention)/Seq2Seq(Attention).py ================================================ # %% # code by Tae Hwan Jung @graykode # Reference : https://github.com/hunkim/PyTorchZeroToAll/blob/master/14_2_seq2seq_att.py import numpy as np import torch import torch.nn as nn import torch.nn.functional as F import matplotlib.pyplot as plt # S: Symbol that shows starting of decoding input # E: Symbol that shows starting of decoding output # P: Symbol that will fill in blank sequence if current batch data size is short than time steps def make_batch(): input_batch = [np.eye(n_class)[[word_dict[n] for n in sentences[0].split()]]] output_batch = [np.eye(n_class)[[word_dict[n] for n in sentences[1].split()]]] target_batch = [[word_dict[n] for n in sentences[2].split()]] # make tensor return torch.FloatTensor(input_batch), torch.FloatTensor(output_batch), torch.LongTensor(target_batch) class Attention(nn.Module): def __init__(self): super(Attention, self).__init__() self.enc_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5) self.dec_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5) # Linear for attention self.attn = nn.Linear(n_hidden, n_hidden) self.out = nn.Linear(n_hidden * 2, n_class) def forward(self, enc_inputs, hidden, dec_inputs): enc_inputs = enc_inputs.transpose(0, 1) # enc_inputs: [n_step(=n_step, time step), batch_size, n_class] dec_inputs = dec_inputs.transpose(0, 1) # dec_inputs: [n_step(=n_step, time step), batch_size, n_class] # enc_outputs : [n_step, batch_size, num_directions(=1) * n_hidden], matrix F # enc_hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden] enc_outputs, enc_hidden = self.enc_cell(enc_inputs, hidden) trained_attn = [] hidden = enc_hidden n_step = len(dec_inputs) model = torch.empty([n_step, 1, n_class]) for i in range(n_step): # each time step # dec_output : [n_step(=1), batch_size(=1), num_directions(=1) * n_hidden] # hidden : [num_layers(=1) * num_directions(=1), batch_size(=1), n_hidden] dec_output, hidden = self.dec_cell(dec_inputs[i].unsqueeze(0), hidden) attn_weights = self.get_att_weight(dec_output, enc_outputs) # attn_weights : [1, 1, n_step] trained_attn.append(attn_weights.squeeze().data.numpy()) # matrix-matrix product of matrices [1,1,n_step] x [1,n_step,n_hidden] = [1,1,n_hidden] context = attn_weights.bmm(enc_outputs.transpose(0, 1)) dec_output = dec_output.squeeze(0) # dec_output : [batch_size(=1), num_directions(=1) * n_hidden] context = context.squeeze(1) # [1, num_directions(=1) * n_hidden] model[i] = self.out(torch.cat((dec_output, context), 1)) # make model shape [n_step, n_class] return model.transpose(0, 1).squeeze(0), trained_attn def get_att_weight(self, dec_output, enc_outputs): # get attention weight one 'dec_output' with 'enc_outputs' n_step = len(enc_outputs) attn_scores = torch.zeros(n_step) # attn_scores : [n_step] for i in range(n_step): attn_scores[i] = self.get_att_score(dec_output, enc_outputs[i]) # Normalize scores to weights in range 0 to 1 return F.softmax(attn_scores).view(1, 1, -1) def get_att_score(self, dec_output, enc_output): # enc_outputs [batch_size, num_directions(=1) * n_hidden] score = self.attn(enc_output) # score : [batch_size, n_hidden] return torch.dot(dec_output.view(-1), score.view(-1)) # inner product make scalar value if __name__ == '__main__': n_step = 5 # number of cells(= number of Step) n_hidden = 128 # number of hidden units in one cell sentences = ['ich mochte ein bier P', 'S i want a beer', 'i want a beer E'] word_list = " ".join(sentences).split() word_list = list(set(word_list)) word_dict = {w: i for i, w in enumerate(word_list)} number_dict = {i: w for i, w in enumerate(word_list)} n_class = len(word_dict) # vocab list # hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden] hidden = torch.zeros(1, 1, n_hidden) model = Attention() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.001) input_batch, output_batch, target_batch = make_batch() # Train for epoch in range(2000): optimizer.zero_grad() output, _ = model(input_batch, hidden, output_batch) loss = criterion(output, target_batch.squeeze(0)) if (epoch + 1) % 400 == 0: print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) loss.backward() optimizer.step() # Test test_batch = [np.eye(n_class)[[word_dict[n] for n in 'SPPPP']]] test_batch = torch.FloatTensor(test_batch) predict, trained_attn = model(input_batch, hidden, test_batch) predict = predict.data.max(1, keepdim=True)[1] print(sentences[0], '->', [number_dict[n.item()] for n in predict.squeeze()]) # Show Attention fig = plt.figure(figsize=(5, 5)) ax = fig.add_subplot(1, 1, 1) ax.matshow(trained_attn, cmap='viridis') ax.set_xticklabels([''] + sentences[0].split(), fontdict={'fontsize': 14}) ax.set_yticklabels([''] + sentences[2].split(), fontdict={'fontsize': 14}) plt.show() ================================================ FILE: 4-3.Bi-LSTM(Attention)/Bi-LSTM(Attention).ipynb ================================================ { "cells": [ { "cell_type": "code", "metadata": {}, "source": [ "# code by Tae Hwan Jung(Jeff Jung) @graykode\n", "# Reference : https://github.com/prakashpandey9/Text-Classification-Pytorch/blob/master/models/LSTM_Attn.py\n", "import numpy as np\n", "import torch\n", "import torch.nn as nn\n", "import torch.optim as optim\n", "import torch.nn.functional as F\n", "import matplotlib.pyplot as plt\n", "\n", "class BiLSTM_Attention(nn.Module):\n", " def __init__(self):\n", " super(BiLSTM_Attention, self).__init__()\n", "\n", " self.embedding = nn.Embedding(vocab_size, embedding_dim)\n", " self.lstm = nn.LSTM(embedding_dim, n_hidden, bidirectional=True)\n", " self.out = nn.Linear(n_hidden * 2, num_classes)\n", "\n", " # lstm_output : [batch_size, n_step, n_hidden * num_directions(=2)], F matrix\n", " def attention_net(self, lstm_output, final_state):\n", " hidden = final_state.view(-1, n_hidden * 2, 1) # hidden : [batch_size, n_hidden * num_directions(=2), 1(=n_layer)]\n", " attn_weights = torch.bmm(lstm_output, hidden).squeeze(2) # attn_weights : [batch_size, n_step]\n", " soft_attn_weights = F.softmax(attn_weights, 1)\n", " # [batch_size, n_hidden * num_directions(=2), n_step] * [batch_size, n_step, 1] = [batch_size, n_hidden * num_directions(=2), 1]\n", " context = torch.bmm(lstm_output.transpose(1, 2), soft_attn_weights.unsqueeze(2)).squeeze(2)\n", " return context, soft_attn_weights.data.numpy() # context : [batch_size, n_hidden * num_directions(=2)]\n", "\n", " def forward(self, X):\n", " input = self.embedding(X) # input : [batch_size, len_seq, embedding_dim]\n", " input = input.permute(1, 0, 2) # input : [len_seq, batch_size, embedding_dim]\n", "\n", " hidden_state = torch.zeros(1*2, len(X), n_hidden) # [num_layers(=1) * num_directions(=2), batch_size, n_hidden]\n", " cell_state = torch.zeros(1*2, len(X), n_hidden) # [num_layers(=1) * num_directions(=2), batch_size, n_hidden]\n", "\n", " # final_hidden_state, final_cell_state : [num_layers(=1) * num_directions(=2), batch_size, n_hidden]\n", " output, (final_hidden_state, final_cell_state) = self.lstm(input, (hidden_state, cell_state))\n", " output = output.permute(1, 0, 2) # output : [batch_size, len_seq, n_hidden]\n", " attn_output, attention = self.attention_net(output, final_hidden_state)\n", " return self.out(attn_output), attention # model : [batch_size, num_classes], attention : [batch_size, n_step]\n", "\n", "if __name__ == '__main__':\n", " embedding_dim = 2 # embedding size\n", " n_hidden = 5 # number of hidden units in one cell\n", " num_classes = 2 # 0 or 1\n", "\n", " # 3 words sentences (=sequence_length is 3)\n", " sentences = [\"i love you\", \"he loves me\", \"she likes baseball\", \"i hate you\", \"sorry for that\", \"this is awful\"]\n", " labels = [1, 1, 1, 0, 0, 0] # 1 is good, 0 is not good.\n", "\n", " word_list = \" \".join(sentences).split()\n", " word_list = list(set(word_list))\n", " word_dict = {w: i for i, w in enumerate(word_list)}\n", " vocab_size = len(word_dict)\n", "\n", " model = BiLSTM_Attention()\n", "\n", " criterion = nn.CrossEntropyLoss()\n", " optimizer = optim.Adam(model.parameters(), lr=0.001)\n", "\n", " inputs = torch.LongTensor([np.asarray([word_dict[n] for n in sen.split()]) for sen in sentences])\n", " targets = torch.LongTensor([out for out in labels]) # To using Torch Softmax Loss function\n", "\n", " # Training\n", " for epoch in range(5000):\n", " optimizer.zero_grad()\n", " output, attention = model(inputs)\n", " loss = criterion(output, targets)\n", " if (epoch + 1) % 1000 == 0:\n", " print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n", "\n", " loss.backward()\n", " optimizer.step()\n", "\n", " # Test\n", " test_text = 'sorry hate you'\n", " tests = [np.asarray([word_dict[n] for n in test_text.split()])]\n", " test_batch = torch.LongTensor(tests)\n", "\n", " # Predict\n", " predict, _ = model(test_batch)\n", " predict = predict.data.max(1, keepdim=True)[1]\n", " if predict[0][0] == 0:\n", " print(test_text,\"is Bad Mean...\")\n", " else:\n", " print(test_text,\"is Good Mean!!\")\n", "\n", " fig = plt.figure(figsize=(6, 3)) # [batch_size, n_step]\n", " ax = fig.add_subplot(1, 1, 1)\n", " ax.matshow(attention, cmap='viridis')\n", " ax.set_xticklabels(['']+['first_word', 'second_word', 'third_word'], fontdict={'fontsize': 14}, rotation=90)\n", " ax.set_yticklabels(['']+['batch_1', 'batch_2', 'batch_3', 'batch_4', 'batch_5', 'batch_6'], fontdict={'fontsize': 14})\n", " plt.show()" ], "outputs": [], "execution_count": null } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: 4-3.Bi-LSTM(Attention)/Bi-LSTM(Attention).py ================================================ # %% # code by Tae Hwan Jung(Jeff Jung) @graykode # Reference : https://github.com/prakashpandey9/Text-Classification-Pytorch/blob/master/models/LSTM_Attn.py import numpy as np import torch import torch.nn as nn import torch.optim as optim import torch.nn.functional as F import matplotlib.pyplot as plt class BiLSTM_Attention(nn.Module): def __init__(self): super(BiLSTM_Attention, self).__init__() self.embedding = nn.Embedding(vocab_size, embedding_dim) self.lstm = nn.LSTM(embedding_dim, n_hidden, bidirectional=True) self.out = nn.Linear(n_hidden * 2, num_classes) # lstm_output : [batch_size, n_step, n_hidden * num_directions(=2)], F matrix def attention_net(self, lstm_output, final_state): hidden = final_state.view(-1, n_hidden * 2, 1) # hidden : [batch_size, n_hidden * num_directions(=2), 1(=n_layer)] attn_weights = torch.bmm(lstm_output, hidden).squeeze(2) # attn_weights : [batch_size, n_step] soft_attn_weights = F.softmax(attn_weights, 1) # [batch_size, n_hidden * num_directions(=2), n_step] * [batch_size, n_step, 1] = [batch_size, n_hidden * num_directions(=2), 1] context = torch.bmm(lstm_output.transpose(1, 2), soft_attn_weights.unsqueeze(2)).squeeze(2) return context, soft_attn_weights.data.numpy() # context : [batch_size, n_hidden * num_directions(=2)] def forward(self, X): input = self.embedding(X) # input : [batch_size, len_seq, embedding_dim] input = input.permute(1, 0, 2) # input : [len_seq, batch_size, embedding_dim] hidden_state = torch.zeros(1*2, len(X), n_hidden) # [num_layers(=1) * num_directions(=2), batch_size, n_hidden] cell_state = torch.zeros(1*2, len(X), n_hidden) # [num_layers(=1) * num_directions(=2), batch_size, n_hidden] # final_hidden_state, final_cell_state : [num_layers(=1) * num_directions(=2), batch_size, n_hidden] output, (final_hidden_state, final_cell_state) = self.lstm(input, (hidden_state, cell_state)) output = output.permute(1, 0, 2) # output : [batch_size, len_seq, n_hidden] attn_output, attention = self.attention_net(output, final_hidden_state) return self.out(attn_output), attention # model : [batch_size, num_classes], attention : [batch_size, n_step] if __name__ == '__main__': embedding_dim = 2 # embedding size n_hidden = 5 # number of hidden units in one cell num_classes = 2 # 0 or 1 # 3 words sentences (=sequence_length is 3) sentences = ["i love you", "he loves me", "she likes baseball", "i hate you", "sorry for that", "this is awful"] labels = [1, 1, 1, 0, 0, 0] # 1 is good, 0 is not good. word_list = " ".join(sentences).split() word_list = list(set(word_list)) word_dict = {w: i for i, w in enumerate(word_list)} vocab_size = len(word_dict) model = BiLSTM_Attention() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) inputs = torch.LongTensor([np.asarray([word_dict[n] for n in sen.split()]) for sen in sentences]) targets = torch.LongTensor([out for out in labels]) # To using Torch Softmax Loss function # Training for epoch in range(5000): optimizer.zero_grad() output, attention = model(inputs) loss = criterion(output, targets) if (epoch + 1) % 1000 == 0: print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) loss.backward() optimizer.step() # Test test_text = 'sorry hate you' tests = [np.asarray([word_dict[n] for n in test_text.split()])] test_batch = torch.LongTensor(tests) # Predict predict, _ = model(test_batch) predict = predict.data.max(1, keepdim=True)[1] if predict[0][0] == 0: print(test_text,"is Bad Mean...") else: print(test_text,"is Good Mean!!") fig = plt.figure(figsize=(6, 3)) # [batch_size, n_step] ax = fig.add_subplot(1, 1, 1) ax.matshow(attention, cmap='viridis') ax.set_xticklabels(['']+['first_word', 'second_word', 'third_word'], fontdict={'fontsize': 14}, rotation=90) ax.set_yticklabels(['']+['batch_1', 'batch_2', 'batch_3', 'batch_4', 'batch_5', 'batch_6'], fontdict={'fontsize': 14}) plt.show() ================================================ FILE: 5-1.Transformer/Transformer(Greedy_decoder).ipynb ================================================ { "cells": [ { "cell_type": "code", "metadata": {}, "source": [ "# code by Tae Hwan Jung(Jeff Jung) @graykode, Derek Miller @dmmiller612\n", "# Reference : https://github.com/jadore801120/attention-is-all-you-need-pytorch\n", "# https://github.com/JayParks/transformer\n", "import numpy as np\n", "import torch\n", "import torch.nn as nn\n", "import torch.optim as optim\n", "import matplotlib.pyplot as plt\n", "\n", "# S: Symbol that shows starting of decoding input\n", "# E: Symbol that shows starting of decoding output\n", "# P: Symbol that will fill in blank sequence if current batch data size is short than time steps\n", "\n", "def make_batch():\n", " input_batch = [[src_vocab[n] for n in sentences[0].split()]]\n", " output_batch = [[tgt_vocab[n] for n in sentences[1].split()]]\n", " target_batch = [[tgt_vocab[n] for n in sentences[2].split()]]\n", " return torch.LongTensor(input_batch), torch.LongTensor(output_batch), torch.LongTensor(target_batch)\n", "\n", "def get_sinusoid_encoding_table(n_position, d_model):\n", " def cal_angle(position, hid_idx):\n", " return position / np.power(10000, 2 * (hid_idx // 2) / d_model)\n", " def get_posi_angle_vec(position):\n", " return [cal_angle(position, hid_j) for hid_j in range(d_model)]\n", "\n", " sinusoid_table = np.array([get_posi_angle_vec(pos_i) for pos_i in range(n_position)])\n", " sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2]) # dim 2i\n", " sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2]) # dim 2i+1\n", " return torch.FloatTensor(sinusoid_table)\n", "\n", "def get_attn_pad_mask(seq_q, seq_k):\n", " # print(seq_q)\n", " batch_size, len_q = seq_q.size()\n", " batch_size, len_k = seq_k.size()\n", " # eq(zero) is PAD token\n", " pad_attn_mask = seq_k.data.eq(0).unsqueeze(1) # batch_size x 1 x len_k(=len_q), one is masking\n", " return pad_attn_mask.expand(batch_size, len_q, len_k) # batch_size x len_q x len_k\n", "\n", "def get_attn_subsequent_mask(seq):\n", " attn_shape = [seq.size(0), seq.size(1), seq.size(1)]\n", " subsequent_mask = np.triu(np.ones(attn_shape), k=1)\n", " subsequent_mask = torch.from_numpy(subsequent_mask).byte()\n", " return subsequent_mask\n", "\n", "class ScaledDotProductAttention(nn.Module):\n", " def __init__(self):\n", " super(ScaledDotProductAttention, self).__init__()\n", "\n", " def forward(self, Q, K, V, attn_mask):\n", " scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(d_k) # scores : [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]\n", " scores.masked_fill_(attn_mask, -1e9) # Fills elements of self tensor with value where mask is one.\n", " attn = nn.Softmax(dim=-1)(scores)\n", " context = torch.matmul(attn, V)\n", " return context, attn\n", "\n", "class MultiHeadAttention(nn.Module):\n", " def __init__(self):\n", " super(MultiHeadAttention, self).__init__()\n", " self.W_Q = nn.Linear(d_model, d_k * n_heads)\n", " self.W_K = nn.Linear(d_model, d_k * n_heads)\n", " self.W_V = nn.Linear(d_model, d_v * n_heads)\n", " self.linear = nn.Linear(n_heads * d_v, d_model)\n", " self.layer_norm = nn.LayerNorm(d_model)\n", "\n", " def forward(self, Q, K, V, attn_mask):\n", " # q: [batch_size x len_q x d_model], k: [batch_size x len_k x d_model], v: [batch_size x len_k x d_model]\n", " residual, batch_size = Q, Q.size(0)\n", " # (B, S, D) -proj-> (B, S, D) -split-> (B, S, H, W) -trans-> (B, H, S, W)\n", " q_s = self.W_Q(Q).view(batch_size, -1, n_heads, d_k).transpose(1,2) # q_s: [batch_size x n_heads x len_q x d_k]\n", " k_s = self.W_K(K).view(batch_size, -1, n_heads, d_k).transpose(1,2) # k_s: [batch_size x n_heads x len_k x d_k]\n", " v_s = self.W_V(V).view(batch_size, -1, n_heads, d_v).transpose(1,2) # v_s: [batch_size x n_heads x len_k x d_v]\n", "\n", " attn_mask = attn_mask.unsqueeze(1).repeat(1, n_heads, 1, 1) # attn_mask : [batch_size x n_heads x len_q x len_k]\n", "\n", " # context: [batch_size x n_heads x len_q x d_v], attn: [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]\n", " context, attn = ScaledDotProductAttention()(q_s, k_s, v_s, attn_mask)\n", " context = context.transpose(1, 2).contiguous().view(batch_size, -1, n_heads * d_v) # context: [batch_size x len_q x n_heads * d_v]\n", " output = self.linear(context)\n", " return self.layer_norm(output + residual), attn # output: [batch_size x len_q x d_model]\n", "\n", "class PoswiseFeedForwardNet(nn.Module):\n", " def __init__(self):\n", " super(PoswiseFeedForwardNet, self).__init__()\n", " self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)\n", " self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)\n", " self.layer_norm = nn.LayerNorm(d_model)\n", "\n", " def forward(self, inputs):\n", " residual = inputs # inputs : [batch_size, len_q, d_model]\n", " output = nn.ReLU()(self.conv1(inputs.transpose(1, 2)))\n", " output = self.conv2(output).transpose(1, 2)\n", " return self.layer_norm(output + residual)\n", "\n", "class EncoderLayer(nn.Module):\n", " def __init__(self):\n", " super(EncoderLayer, self).__init__()\n", " self.enc_self_attn = MultiHeadAttention()\n", " self.pos_ffn = PoswiseFeedForwardNet()\n", "\n", " def forward(self, enc_inputs, enc_self_attn_mask):\n", " enc_outputs, attn = self.enc_self_attn(enc_inputs, enc_inputs, enc_inputs, enc_self_attn_mask) # enc_inputs to same Q,K,V\n", " enc_outputs = self.pos_ffn(enc_outputs) # enc_outputs: [batch_size x len_q x d_model]\n", " return enc_outputs, attn\n", "\n", "class DecoderLayer(nn.Module):\n", " def __init__(self):\n", " super(DecoderLayer, self).__init__()\n", " self.dec_self_attn = MultiHeadAttention()\n", " self.dec_enc_attn = MultiHeadAttention()\n", " self.pos_ffn = PoswiseFeedForwardNet()\n", "\n", " def forward(self, dec_inputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask):\n", " dec_outputs, dec_self_attn = self.dec_self_attn(dec_inputs, dec_inputs, dec_inputs, dec_self_attn_mask)\n", " dec_outputs, dec_enc_attn = self.dec_enc_attn(dec_outputs, enc_outputs, enc_outputs, dec_enc_attn_mask)\n", " dec_outputs = self.pos_ffn(dec_outputs)\n", " return dec_outputs, dec_self_attn, dec_enc_attn\n", "\n", "class Encoder(nn.Module):\n", " def __init__(self):\n", " super(Encoder, self).__init__()\n", " self.src_emb = nn.Embedding(src_vocab_size, d_model)\n", " self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(src_len+1, d_model),freeze=True)\n", " self.layers = nn.ModuleList([EncoderLayer() for _ in range(n_layers)])\n", "\n", " def forward(self, enc_inputs): # enc_inputs : [batch_size x source_len]\n", " enc_outputs = self.src_emb(enc_inputs) + self.pos_emb(torch.LongTensor([[1,2,3,4,0]]))\n", " enc_self_attn_mask = get_attn_pad_mask(enc_inputs, enc_inputs)\n", " enc_self_attns = []\n", " for layer in self.layers:\n", " enc_outputs, enc_self_attn = layer(enc_outputs, enc_self_attn_mask)\n", " enc_self_attns.append(enc_self_attn)\n", " return enc_outputs, enc_self_attns\n", "\n", "class Decoder(nn.Module):\n", " def __init__(self):\n", " super(Decoder, self).__init__()\n", " self.tgt_emb = nn.Embedding(tgt_vocab_size, d_model)\n", " self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(tgt_len+1, d_model),freeze=True)\n", " self.layers = nn.ModuleList([DecoderLayer() for _ in range(n_layers)])\n", "\n", " def forward(self, dec_inputs, enc_inputs, enc_outputs): # dec_inputs : [batch_size x target_len]\n", " dec_outputs = self.tgt_emb(dec_inputs) + self.pos_emb(torch.LongTensor([[5,1,2,3,4]]))\n", " dec_self_attn_pad_mask = get_attn_pad_mask(dec_inputs, dec_inputs)\n", " dec_self_attn_subsequent_mask = get_attn_subsequent_mask(dec_inputs)\n", " dec_self_attn_mask = torch.gt((dec_self_attn_pad_mask + dec_self_attn_subsequent_mask), 0)\n", "\n", " dec_enc_attn_mask = get_attn_pad_mask(dec_inputs, enc_inputs)\n", "\n", " dec_self_attns, dec_enc_attns = [], []\n", " for layer in self.layers:\n", " dec_outputs, dec_self_attn, dec_enc_attn = layer(dec_outputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask)\n", " dec_self_attns.append(dec_self_attn)\n", " dec_enc_attns.append(dec_enc_attn)\n", " return dec_outputs, dec_self_attns, dec_enc_attns\n", "\n", "class Transformer(nn.Module):\n", " def __init__(self):\n", " super(Transformer, self).__init__()\n", " self.encoder = Encoder()\n", " self.decoder = Decoder()\n", " self.projection = nn.Linear(d_model, tgt_vocab_size, bias=False)\n", " def forward(self, enc_inputs, dec_inputs):\n", " enc_outputs, enc_self_attns = self.encoder(enc_inputs)\n", " dec_outputs, dec_self_attns, dec_enc_attns = self.decoder(dec_inputs, enc_inputs, enc_outputs)\n", " dec_logits = self.projection(dec_outputs) # dec_logits : [batch_size x src_vocab_size x tgt_vocab_size]\n", " return dec_logits.view(-1, dec_logits.size(-1)), enc_self_attns, dec_self_attns, dec_enc_attns\n", "\n", "def greedy_decoder(model, enc_input, start_symbol):\n", " \"\"\"\n", " For simplicity, a Greedy Decoder is Beam search when K=1. This is necessary for inference as we don't know the\n", " target sequence input. Therefore we try to generate the target input word by word, then feed it into the transformer.\n", " Starting Reference: http://nlp.seas.harvard.edu/2018/04/03/attention.html#greedy-decoding\n", " :param model: Transformer Model\n", " :param enc_input: The encoder input\n", " :param start_symbol: The start symbol. In this example it is 'S' which corresponds to index 4\n", " :return: The target input\n", " \"\"\"\n", " enc_outputs, enc_self_attns = model.encoder(enc_input)\n", " dec_input = torch.zeros(1, 5).type_as(enc_input.data)\n", " next_symbol = start_symbol\n", " for i in range(0, 5):\n", " dec_input[0][i] = next_symbol\n", " dec_outputs, _, _ = model.decoder(dec_input, enc_input, enc_outputs)\n", " projected = model.projection(dec_outputs)\n", " prob = projected.squeeze(0).max(dim=-1, keepdim=False)[1]\n", " next_word = prob.data[i]\n", " next_symbol = next_word.item()\n", " return dec_input\n", "\n", "def showgraph(attn):\n", " attn = attn[-1].squeeze(0)[0]\n", " attn = attn.squeeze(0).data.numpy()\n", " fig = plt.figure(figsize=(n_heads, n_heads)) # [n_heads, n_heads]\n", " ax = fig.add_subplot(1, 1, 1)\n", " ax.matshow(attn, cmap='viridis')\n", " ax.set_xticklabels(['']+sentences[0].split(), fontdict={'fontsize': 14}, rotation=90)\n", " ax.set_yticklabels(['']+sentences[2].split(), fontdict={'fontsize': 14})\n", " plt.show()\n", "\n", "if __name__ == '__main__':\n", " sentences = ['ich mochte ein bier P', 'S i want a beer', 'i want a beer E']\n", " # Transformer Parameters\n", " # Padding Should be Zero index\n", " src_vocab = {'P': 0, 'ich': 1, 'mochte': 2, 'ein': 3, 'bier': 4}\n", " src_vocab_size = len(src_vocab)\n", "\n", " tgt_vocab = {'P': 0, 'i': 1, 'want': 2, 'a': 3, 'beer': 4, 'S': 5, 'E': 6}\n", " number_dict = {i: w for i, w in enumerate(tgt_vocab)}\n", " tgt_vocab_size = len(tgt_vocab)\n", "\n", " src_len = 5 # length of source\n", " tgt_len = 5 # length of target\n", "\n", " d_model = 512 # Embedding Size\n", " d_ff = 2048 # FeedForward dimension\n", " d_k = d_v = 64 # dimension of K(=Q), V\n", " n_layers = 6 # number of Encoder of Decoder Layer\n", " n_heads = 8 # number of heads in Multi-Head Attention\n", "\n", " model = Transformer()\n", "\n", " criterion = nn.CrossEntropyLoss()\n", " optimizer = optim.Adam(model.parameters(), lr=0.001)\n", "\n", " enc_inputs, dec_inputs, target_batch = make_batch()\n", "\n", " for epoch in range(20):\n", " optimizer.zero_grad()\n", " outputs, enc_self_attns, dec_self_attns, dec_enc_attns = model(enc_inputs, dec_inputs)\n", " loss = criterion(outputs, target_batch.contiguous().view(-1))\n", " print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n", " loss.backward()\n", " optimizer.step()\n", "\n", " # Test\n", " greedy_dec_input = greedy_decoder(model, enc_inputs, start_symbol=tgt_vocab[\"S\"])\n", " predict, _, _, _ = model(enc_inputs, greedy_dec_input)\n", " predict = predict.data.max(1, keepdim=True)[1]\n", " print(sentences[0], '->', [number_dict[n.item()] for n in predict.squeeze()])\n", "\n", " print('first head of last state enc_self_attns')\n", " showgraph(enc_self_attns)\n", "\n", " print('first head of last state dec_self_attns')\n", " showgraph(dec_self_attns)\n", "\n", " print('first head of last state dec_enc_attns')\n", " showgraph(dec_enc_attns)" ], "outputs": [], "execution_count": null } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: 5-1.Transformer/Transformer(Greedy_decoder).py ================================================ # %% # code by Tae Hwan Jung(Jeff Jung) @graykode, Derek Miller @dmmiller612 # Reference : https://github.com/jadore801120/attention-is-all-you-need-pytorch # https://github.com/JayParks/transformer import numpy as np import torch import torch.nn as nn import torch.optim as optim import matplotlib.pyplot as plt # S: Symbol that shows starting of decoding input # E: Symbol that shows starting of decoding output # P: Symbol that will fill in blank sequence if current batch data size is short than time steps def make_batch(): input_batch = [[src_vocab[n] for n in sentences[0].split()]] output_batch = [[tgt_vocab[n] for n in sentences[1].split()]] target_batch = [[tgt_vocab[n] for n in sentences[2].split()]] return torch.LongTensor(input_batch), torch.LongTensor(output_batch), torch.LongTensor(target_batch) def get_sinusoid_encoding_table(n_position, d_model): def cal_angle(position, hid_idx): return position / np.power(10000, 2 * (hid_idx // 2) / d_model) def get_posi_angle_vec(position): return [cal_angle(position, hid_j) for hid_j in range(d_model)] sinusoid_table = np.array([get_posi_angle_vec(pos_i) for pos_i in range(n_position)]) sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2]) # dim 2i sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2]) # dim 2i+1 return torch.FloatTensor(sinusoid_table) def get_attn_pad_mask(seq_q, seq_k): # print(seq_q) batch_size, len_q = seq_q.size() batch_size, len_k = seq_k.size() # eq(zero) is PAD token pad_attn_mask = seq_k.data.eq(0).unsqueeze(1) # batch_size x 1 x len_k(=len_q), one is masking return pad_attn_mask.expand(batch_size, len_q, len_k) # batch_size x len_q x len_k def get_attn_subsequent_mask(seq): attn_shape = [seq.size(0), seq.size(1), seq.size(1)] subsequent_mask = np.triu(np.ones(attn_shape), k=1) subsequent_mask = torch.from_numpy(subsequent_mask).byte() return subsequent_mask class ScaledDotProductAttention(nn.Module): def __init__(self): super(ScaledDotProductAttention, self).__init__() def forward(self, Q, K, V, attn_mask): scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(d_k) # scores : [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)] scores.masked_fill_(attn_mask, -1e9) # Fills elements of self tensor with value where mask is one. attn = nn.Softmax(dim=-1)(scores) context = torch.matmul(attn, V) return context, attn class MultiHeadAttention(nn.Module): def __init__(self): super(MultiHeadAttention, self).__init__() self.W_Q = nn.Linear(d_model, d_k * n_heads) self.W_K = nn.Linear(d_model, d_k * n_heads) self.W_V = nn.Linear(d_model, d_v * n_heads) self.linear = nn.Linear(n_heads * d_v, d_model) self.layer_norm = nn.LayerNorm(d_model) def forward(self, Q, K, V, attn_mask): # q: [batch_size x len_q x d_model], k: [batch_size x len_k x d_model], v: [batch_size x len_k x d_model] residual, batch_size = Q, Q.size(0) # (B, S, D) -proj-> (B, S, D) -split-> (B, S, H, W) -trans-> (B, H, S, W) q_s = self.W_Q(Q).view(batch_size, -1, n_heads, d_k).transpose(1,2) # q_s: [batch_size x n_heads x len_q x d_k] k_s = self.W_K(K).view(batch_size, -1, n_heads, d_k).transpose(1,2) # k_s: [batch_size x n_heads x len_k x d_k] v_s = self.W_V(V).view(batch_size, -1, n_heads, d_v).transpose(1,2) # v_s: [batch_size x n_heads x len_k x d_v] attn_mask = attn_mask.unsqueeze(1).repeat(1, n_heads, 1, 1) # attn_mask : [batch_size x n_heads x len_q x len_k] # context: [batch_size x n_heads x len_q x d_v], attn: [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)] context, attn = ScaledDotProductAttention()(q_s, k_s, v_s, attn_mask) context = context.transpose(1, 2).contiguous().view(batch_size, -1, n_heads * d_v) # context: [batch_size x len_q x n_heads * d_v] output = self.linear(context) return self.layer_norm(output + residual), attn # output: [batch_size x len_q x d_model] class PoswiseFeedForwardNet(nn.Module): def __init__(self): super(PoswiseFeedForwardNet, self).__init__() self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) self.layer_norm = nn.LayerNorm(d_model) def forward(self, inputs): residual = inputs # inputs : [batch_size, len_q, d_model] output = nn.ReLU()(self.conv1(inputs.transpose(1, 2))) output = self.conv2(output).transpose(1, 2) return self.layer_norm(output + residual) class EncoderLayer(nn.Module): def __init__(self): super(EncoderLayer, self).__init__() self.enc_self_attn = MultiHeadAttention() self.pos_ffn = PoswiseFeedForwardNet() def forward(self, enc_inputs, enc_self_attn_mask): enc_outputs, attn = self.enc_self_attn(enc_inputs, enc_inputs, enc_inputs, enc_self_attn_mask) # enc_inputs to same Q,K,V enc_outputs = self.pos_ffn(enc_outputs) # enc_outputs: [batch_size x len_q x d_model] return enc_outputs, attn class DecoderLayer(nn.Module): def __init__(self): super(DecoderLayer, self).__init__() self.dec_self_attn = MultiHeadAttention() self.dec_enc_attn = MultiHeadAttention() self.pos_ffn = PoswiseFeedForwardNet() def forward(self, dec_inputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask): dec_outputs, dec_self_attn = self.dec_self_attn(dec_inputs, dec_inputs, dec_inputs, dec_self_attn_mask) dec_outputs, dec_enc_attn = self.dec_enc_attn(dec_outputs, enc_outputs, enc_outputs, dec_enc_attn_mask) dec_outputs = self.pos_ffn(dec_outputs) return dec_outputs, dec_self_attn, dec_enc_attn class Encoder(nn.Module): def __init__(self): super(Encoder, self).__init__() self.src_emb = nn.Embedding(src_vocab_size, d_model) self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(src_len+1, d_model),freeze=True) self.layers = nn.ModuleList([EncoderLayer() for _ in range(n_layers)]) def forward(self, enc_inputs): # enc_inputs : [batch_size x source_len] enc_outputs = self.src_emb(enc_inputs) + self.pos_emb(torch.LongTensor([[1,2,3,4,0]])) enc_self_attn_mask = get_attn_pad_mask(enc_inputs, enc_inputs) enc_self_attns = [] for layer in self.layers: enc_outputs, enc_self_attn = layer(enc_outputs, enc_self_attn_mask) enc_self_attns.append(enc_self_attn) return enc_outputs, enc_self_attns class Decoder(nn.Module): def __init__(self): super(Decoder, self).__init__() self.tgt_emb = nn.Embedding(tgt_vocab_size, d_model) self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(tgt_len+1, d_model),freeze=True) self.layers = nn.ModuleList([DecoderLayer() for _ in range(n_layers)]) def forward(self, dec_inputs, enc_inputs, enc_outputs): # dec_inputs : [batch_size x target_len] dec_outputs = self.tgt_emb(dec_inputs) + self.pos_emb(torch.LongTensor([[5,1,2,3,4]])) dec_self_attn_pad_mask = get_attn_pad_mask(dec_inputs, dec_inputs) dec_self_attn_subsequent_mask = get_attn_subsequent_mask(dec_inputs) dec_self_attn_mask = torch.gt((dec_self_attn_pad_mask + dec_self_attn_subsequent_mask), 0) dec_enc_attn_mask = get_attn_pad_mask(dec_inputs, enc_inputs) dec_self_attns, dec_enc_attns = [], [] for layer in self.layers: dec_outputs, dec_self_attn, dec_enc_attn = layer(dec_outputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask) dec_self_attns.append(dec_self_attn) dec_enc_attns.append(dec_enc_attn) return dec_outputs, dec_self_attns, dec_enc_attns class Transformer(nn.Module): def __init__(self): super(Transformer, self).__init__() self.encoder = Encoder() self.decoder = Decoder() self.projection = nn.Linear(d_model, tgt_vocab_size, bias=False) def forward(self, enc_inputs, dec_inputs): enc_outputs, enc_self_attns = self.encoder(enc_inputs) dec_outputs, dec_self_attns, dec_enc_attns = self.decoder(dec_inputs, enc_inputs, enc_outputs) dec_logits = self.projection(dec_outputs) # dec_logits : [batch_size x src_vocab_size x tgt_vocab_size] return dec_logits.view(-1, dec_logits.size(-1)), enc_self_attns, dec_self_attns, dec_enc_attns def greedy_decoder(model, enc_input, start_symbol): """ For simplicity, a Greedy Decoder is Beam search when K=1. This is necessary for inference as we don't know the target sequence input. Therefore we try to generate the target input word by word, then feed it into the transformer. Starting Reference: http://nlp.seas.harvard.edu/2018/04/03/attention.html#greedy-decoding :param model: Transformer Model :param enc_input: The encoder input :param start_symbol: The start symbol. In this example it is 'S' which corresponds to index 4 :return: The target input """ enc_outputs, enc_self_attns = model.encoder(enc_input) dec_input = torch.zeros(1, 5).type_as(enc_input.data) next_symbol = start_symbol for i in range(0, 5): dec_input[0][i] = next_symbol dec_outputs, _, _ = model.decoder(dec_input, enc_input, enc_outputs) projected = model.projection(dec_outputs) prob = projected.squeeze(0).max(dim=-1, keepdim=False)[1] next_word = prob.data[i] next_symbol = next_word.item() return dec_input def showgraph(attn): attn = attn[-1].squeeze(0)[0] attn = attn.squeeze(0).data.numpy() fig = plt.figure(figsize=(n_heads, n_heads)) # [n_heads, n_heads] ax = fig.add_subplot(1, 1, 1) ax.matshow(attn, cmap='viridis') ax.set_xticklabels(['']+sentences[0].split(), fontdict={'fontsize': 14}, rotation=90) ax.set_yticklabels(['']+sentences[2].split(), fontdict={'fontsize': 14}) plt.show() if __name__ == '__main__': sentences = ['ich mochte ein bier P', 'S i want a beer', 'i want a beer E'] # Transformer Parameters # Padding Should be Zero index src_vocab = {'P': 0, 'ich': 1, 'mochte': 2, 'ein': 3, 'bier': 4} src_vocab_size = len(src_vocab) tgt_vocab = {'P': 0, 'i': 1, 'want': 2, 'a': 3, 'beer': 4, 'S': 5, 'E': 6} number_dict = {i: w for i, w in enumerate(tgt_vocab)} tgt_vocab_size = len(tgt_vocab) src_len = 5 # length of source tgt_len = 5 # length of target d_model = 512 # Embedding Size d_ff = 2048 # FeedForward dimension d_k = d_v = 64 # dimension of K(=Q), V n_layers = 6 # number of Encoder of Decoder Layer n_heads = 8 # number of heads in Multi-Head Attention model = Transformer() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) enc_inputs, dec_inputs, target_batch = make_batch() for epoch in range(20): optimizer.zero_grad() outputs, enc_self_attns, dec_self_attns, dec_enc_attns = model(enc_inputs, dec_inputs) loss = criterion(outputs, target_batch.contiguous().view(-1)) print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) loss.backward() optimizer.step() # Test greedy_dec_input = greedy_decoder(model, enc_inputs, start_symbol=tgt_vocab["S"]) predict, _, _, _ = model(enc_inputs, greedy_dec_input) predict = predict.data.max(1, keepdim=True)[1] print(sentences[0], '->', [number_dict[n.item()] for n in predict.squeeze()]) print('first head of last state enc_self_attns') showgraph(enc_self_attns) print('first head of last state dec_self_attns') showgraph(dec_self_attns) print('first head of last state dec_enc_attns') showgraph(dec_enc_attns) ================================================ FILE: 5-1.Transformer/Transformer.ipynb ================================================ { "cells": [ { "cell_type": "code", "metadata": {}, "source": [ "# code by Tae Hwan Jung(Jeff Jung) @graykode, Derek Miller @dmmiller612\n", "# Reference : https://github.com/jadore801120/attention-is-all-you-need-pytorch\n", "# https://github.com/JayParks/transformer\n", "import numpy as np\n", "import torch\n", "import torch.nn as nn\n", "import torch.optim as optim\n", "import matplotlib.pyplot as plt\n", "\n", "# S: Symbol that shows starting of decoding input\n", "# E: Symbol that shows starting of decoding output\n", "# P: Symbol that will fill in blank sequence if current batch data size is short than time steps\n", "\n", "def make_batch(sentences):\n", " input_batch = [[src_vocab[n] for n in sentences[0].split()]]\n", " output_batch = [[tgt_vocab[n] for n in sentences[1].split()]]\n", " target_batch = [[tgt_vocab[n] for n in sentences[2].split()]]\n", " return torch.LongTensor(input_batch), torch.LongTensor(output_batch), torch.LongTensor(target_batch)\n", "\n", "def get_sinusoid_encoding_table(n_position, d_model):\n", " def cal_angle(position, hid_idx):\n", " return position / np.power(10000, 2 * (hid_idx // 2) / d_model)\n", " def get_posi_angle_vec(position):\n", " return [cal_angle(position, hid_j) for hid_j in range(d_model)]\n", "\n", " sinusoid_table = np.array([get_posi_angle_vec(pos_i) for pos_i in range(n_position)])\n", " sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2]) # dim 2i\n", " sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2]) # dim 2i+1\n", " return torch.FloatTensor(sinusoid_table)\n", "\n", "def get_attn_pad_mask(seq_q, seq_k):\n", " batch_size, len_q = seq_q.size()\n", " batch_size, len_k = seq_k.size()\n", " # eq(zero) is PAD token\n", " pad_attn_mask = seq_k.data.eq(0).unsqueeze(1) # batch_size x 1 x len_k(=len_q), one is masking\n", " return pad_attn_mask.expand(batch_size, len_q, len_k) # batch_size x len_q x len_k\n", "\n", "def get_attn_subsequent_mask(seq):\n", " attn_shape = [seq.size(0), seq.size(1), seq.size(1)]\n", " subsequent_mask = np.triu(np.ones(attn_shape), k=1)\n", " subsequent_mask = torch.from_numpy(subsequent_mask).byte()\n", " return subsequent_mask\n", "\n", "class ScaledDotProductAttention(nn.Module):\n", " def __init__(self):\n", " super(ScaledDotProductAttention, self).__init__()\n", "\n", " def forward(self, Q, K, V, attn_mask):\n", " scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(d_k) # scores : [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]\n", " scores.masked_fill_(attn_mask, -1e9) # Fills elements of self tensor with value where mask is one.\n", " attn = nn.Softmax(dim=-1)(scores)\n", " context = torch.matmul(attn, V)\n", " return context, attn\n", "\n", "class MultiHeadAttention(nn.Module):\n", " def __init__(self):\n", " super(MultiHeadAttention, self).__init__()\n", " self.W_Q = nn.Linear(d_model, d_k * n_heads)\n", " self.W_K = nn.Linear(d_model, d_k * n_heads)\n", " self.W_V = nn.Linear(d_model, d_v * n_heads)\n", " self.linear = nn.Linear(n_heads * d_v, d_model)\n", " self.layer_norm = nn.LayerNorm(d_model)\n", "\n", " def forward(self, Q, K, V, attn_mask):\n", " # q: [batch_size x len_q x d_model], k: [batch_size x len_k x d_model], v: [batch_size x len_k x d_model]\n", " residual, batch_size = Q, Q.size(0)\n", " # (B, S, D) -proj-> (B, S, D) -split-> (B, S, H, W) -trans-> (B, H, S, W)\n", " q_s = self.W_Q(Q).view(batch_size, -1, n_heads, d_k).transpose(1,2) # q_s: [batch_size x n_heads x len_q x d_k]\n", " k_s = self.W_K(K).view(batch_size, -1, n_heads, d_k).transpose(1,2) # k_s: [batch_size x n_heads x len_k x d_k]\n", " v_s = self.W_V(V).view(batch_size, -1, n_heads, d_v).transpose(1,2) # v_s: [batch_size x n_heads x len_k x d_v]\n", "\n", " attn_mask = attn_mask.unsqueeze(1).repeat(1, n_heads, 1, 1) # attn_mask : [batch_size x n_heads x len_q x len_k]\n", "\n", " # context: [batch_size x n_heads x len_q x d_v], attn: [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]\n", " context, attn = ScaledDotProductAttention()(q_s, k_s, v_s, attn_mask)\n", " context = context.transpose(1, 2).contiguous().view(batch_size, -1, n_heads * d_v) # context: [batch_size x len_q x n_heads * d_v]\n", " output = self.linear(context)\n", " return self.layer_norm(output + residual), attn # output: [batch_size x len_q x d_model]\n", "\n", "class PoswiseFeedForwardNet(nn.Module):\n", " def __init__(self):\n", " super(PoswiseFeedForwardNet, self).__init__()\n", " self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)\n", " self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)\n", " self.layer_norm = nn.LayerNorm(d_model)\n", "\n", " def forward(self, inputs):\n", " residual = inputs # inputs : [batch_size, len_q, d_model]\n", " output = nn.ReLU()(self.conv1(inputs.transpose(1, 2)))\n", " output = self.conv2(output).transpose(1, 2)\n", " return self.layer_norm(output + residual)\n", "\n", "class EncoderLayer(nn.Module):\n", " def __init__(self):\n", " super(EncoderLayer, self).__init__()\n", " self.enc_self_attn = MultiHeadAttention()\n", " self.pos_ffn = PoswiseFeedForwardNet()\n", "\n", " def forward(self, enc_inputs, enc_self_attn_mask):\n", " enc_outputs, attn = self.enc_self_attn(enc_inputs, enc_inputs, enc_inputs, enc_self_attn_mask) # enc_inputs to same Q,K,V\n", " enc_outputs = self.pos_ffn(enc_outputs) # enc_outputs: [batch_size x len_q x d_model]\n", " return enc_outputs, attn\n", "\n", "class DecoderLayer(nn.Module):\n", " def __init__(self):\n", " super(DecoderLayer, self).__init__()\n", " self.dec_self_attn = MultiHeadAttention()\n", " self.dec_enc_attn = MultiHeadAttention()\n", " self.pos_ffn = PoswiseFeedForwardNet()\n", "\n", " def forward(self, dec_inputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask):\n", " dec_outputs, dec_self_attn = self.dec_self_attn(dec_inputs, dec_inputs, dec_inputs, dec_self_attn_mask)\n", " dec_outputs, dec_enc_attn = self.dec_enc_attn(dec_outputs, enc_outputs, enc_outputs, dec_enc_attn_mask)\n", " dec_outputs = self.pos_ffn(dec_outputs)\n", " return dec_outputs, dec_self_attn, dec_enc_attn\n", "\n", "class Encoder(nn.Module):\n", " def __init__(self):\n", " super(Encoder, self).__init__()\n", " self.src_emb = nn.Embedding(src_vocab_size, d_model)\n", " self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(src_len+1, d_model),freeze=True)\n", " self.layers = nn.ModuleList([EncoderLayer() for _ in range(n_layers)])\n", "\n", " def forward(self, enc_inputs): # enc_inputs : [batch_size x source_len]\n", " enc_outputs = self.src_emb(enc_inputs) + self.pos_emb(torch.LongTensor([[1,2,3,4,0]]))\n", " enc_self_attn_mask = get_attn_pad_mask(enc_inputs, enc_inputs)\n", " enc_self_attns = []\n", " for layer in self.layers:\n", " enc_outputs, enc_self_attn = layer(enc_outputs, enc_self_attn_mask)\n", " enc_self_attns.append(enc_self_attn)\n", " return enc_outputs, enc_self_attns\n", "\n", "class Decoder(nn.Module):\n", " def __init__(self):\n", " super(Decoder, self).__init__()\n", " self.tgt_emb = nn.Embedding(tgt_vocab_size, d_model)\n", " self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(tgt_len+1, d_model),freeze=True)\n", " self.layers = nn.ModuleList([DecoderLayer() for _ in range(n_layers)])\n", "\n", " def forward(self, dec_inputs, enc_inputs, enc_outputs): # dec_inputs : [batch_size x target_len]\n", " dec_outputs = self.tgt_emb(dec_inputs) + self.pos_emb(torch.LongTensor([[5,1,2,3,4]]))\n", " dec_self_attn_pad_mask = get_attn_pad_mask(dec_inputs, dec_inputs)\n", " dec_self_attn_subsequent_mask = get_attn_subsequent_mask(dec_inputs)\n", " dec_self_attn_mask = torch.gt((dec_self_attn_pad_mask + dec_self_attn_subsequent_mask), 0)\n", "\n", " dec_enc_attn_mask = get_attn_pad_mask(dec_inputs, enc_inputs)\n", "\n", " dec_self_attns, dec_enc_attns = [], []\n", " for layer in self.layers:\n", " dec_outputs, dec_self_attn, dec_enc_attn = layer(dec_outputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask)\n", " dec_self_attns.append(dec_self_attn)\n", " dec_enc_attns.append(dec_enc_attn)\n", " return dec_outputs, dec_self_attns, dec_enc_attns\n", "\n", "class Transformer(nn.Module):\n", " def __init__(self):\n", " super(Transformer, self).__init__()\n", " self.encoder = Encoder()\n", " self.decoder = Decoder()\n", " self.projection = nn.Linear(d_model, tgt_vocab_size, bias=False)\n", " def forward(self, enc_inputs, dec_inputs):\n", " enc_outputs, enc_self_attns = self.encoder(enc_inputs)\n", " dec_outputs, dec_self_attns, dec_enc_attns = self.decoder(dec_inputs, enc_inputs, enc_outputs)\n", " dec_logits = self.projection(dec_outputs) # dec_logits : [batch_size x src_vocab_size x tgt_vocab_size]\n", " return dec_logits.view(-1, dec_logits.size(-1)), enc_self_attns, dec_self_attns, dec_enc_attns\n", "\n", "def showgraph(attn):\n", " attn = attn[-1].squeeze(0)[0]\n", " attn = attn.squeeze(0).data.numpy()\n", " fig = plt.figure(figsize=(n_heads, n_heads)) # [n_heads, n_heads]\n", " ax = fig.add_subplot(1, 1, 1)\n", " ax.matshow(attn, cmap='viridis')\n", " ax.set_xticklabels(['']+sentences[0].split(), fontdict={'fontsize': 14}, rotation=90)\n", " ax.set_yticklabels(['']+sentences[2].split(), fontdict={'fontsize': 14})\n", " plt.show()\n", "\n", "if __name__ == '__main__':\n", " sentences = ['ich mochte ein bier P', 'S i want a beer', 'i want a beer E']\n", "\n", " # Transformer Parameters\n", " # Padding Should be Zero\n", " src_vocab = {'P': 0, 'ich': 1, 'mochte': 2, 'ein': 3, 'bier': 4}\n", " src_vocab_size = len(src_vocab)\n", "\n", " tgt_vocab = {'P': 0, 'i': 1, 'want': 2, 'a': 3, 'beer': 4, 'S': 5, 'E': 6}\n", " number_dict = {i: w for i, w in enumerate(tgt_vocab)}\n", " tgt_vocab_size = len(tgt_vocab)\n", "\n", " src_len = 5 # length of source\n", " tgt_len = 5 # length of target\n", "\n", " d_model = 512 # Embedding Size\n", " d_ff = 2048 # FeedForward dimension\n", " d_k = d_v = 64 # dimension of K(=Q), V\n", " n_layers = 6 # number of Encoder of Decoder Layer\n", " n_heads = 8 # number of heads in Multi-Head Attention\n", "\n", " model = Transformer()\n", "\n", " criterion = nn.CrossEntropyLoss()\n", " optimizer = optim.Adam(model.parameters(), lr=0.001)\n", "\n", " enc_inputs, dec_inputs, target_batch = make_batch(sentences)\n", "\n", " for epoch in range(20):\n", " optimizer.zero_grad()\n", " outputs, enc_self_attns, dec_self_attns, dec_enc_attns = model(enc_inputs, dec_inputs)\n", " loss = criterion(outputs, target_batch.contiguous().view(-1))\n", " print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n", " loss.backward()\n", " optimizer.step()\n", "\n", " # Test\n", " predict, _, _, _ = model(enc_inputs, dec_inputs)\n", " predict = predict.data.max(1, keepdim=True)[1]\n", " print(sentences[0], '->', [number_dict[n.item()] for n in predict.squeeze()])\n", "\n", " print('first head of last state enc_self_attns')\n", " showgraph(enc_self_attns)\n", "\n", " print('first head of last state dec_self_attns')\n", " showgraph(dec_self_attns)\n", "\n", " print('first head of last state dec_enc_attns')\n", " showgraph(dec_enc_attns)" ], "outputs": [], "execution_count": null } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: 5-1.Transformer/Transformer.py ================================================ # %% # code by Tae Hwan Jung(Jeff Jung) @graykode, Derek Miller @dmmiller612 # Reference : https://github.com/jadore801120/attention-is-all-you-need-pytorch # https://github.com/JayParks/transformer import numpy as np import torch import torch.nn as nn import torch.optim as optim import matplotlib.pyplot as plt # S: Symbol that shows starting of decoding input # E: Symbol that shows starting of decoding output # P: Symbol that will fill in blank sequence if current batch data size is short than time steps def make_batch(sentences): input_batch = [[src_vocab[n] for n in sentences[0].split()]] output_batch = [[tgt_vocab[n] for n in sentences[1].split()]] target_batch = [[tgt_vocab[n] for n in sentences[2].split()]] return torch.LongTensor(input_batch), torch.LongTensor(output_batch), torch.LongTensor(target_batch) def get_sinusoid_encoding_table(n_position, d_model): def cal_angle(position, hid_idx): return position / np.power(10000, 2 * (hid_idx // 2) / d_model) def get_posi_angle_vec(position): return [cal_angle(position, hid_j) for hid_j in range(d_model)] sinusoid_table = np.array([get_posi_angle_vec(pos_i) for pos_i in range(n_position)]) sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2]) # dim 2i sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2]) # dim 2i+1 return torch.FloatTensor(sinusoid_table) def get_attn_pad_mask(seq_q, seq_k): batch_size, len_q = seq_q.size() batch_size, len_k = seq_k.size() # eq(zero) is PAD token pad_attn_mask = seq_k.data.eq(0).unsqueeze(1) # batch_size x 1 x len_k(=len_q), one is masking return pad_attn_mask.expand(batch_size, len_q, len_k) # batch_size x len_q x len_k def get_attn_subsequent_mask(seq): attn_shape = [seq.size(0), seq.size(1), seq.size(1)] subsequent_mask = np.triu(np.ones(attn_shape), k=1) subsequent_mask = torch.from_numpy(subsequent_mask).byte() return subsequent_mask class ScaledDotProductAttention(nn.Module): def __init__(self): super(ScaledDotProductAttention, self).__init__() def forward(self, Q, K, V, attn_mask): scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(d_k) # scores : [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)] scores.masked_fill_(attn_mask, -1e9) # Fills elements of self tensor with value where mask is one. attn = nn.Softmax(dim=-1)(scores) context = torch.matmul(attn, V) return context, attn class MultiHeadAttention(nn.Module): def __init__(self): super(MultiHeadAttention, self).__init__() self.W_Q = nn.Linear(d_model, d_k * n_heads) self.W_K = nn.Linear(d_model, d_k * n_heads) self.W_V = nn.Linear(d_model, d_v * n_heads) self.linear = nn.Linear(n_heads * d_v, d_model) self.layer_norm = nn.LayerNorm(d_model) def forward(self, Q, K, V, attn_mask): # q: [batch_size x len_q x d_model], k: [batch_size x len_k x d_model], v: [batch_size x len_k x d_model] residual, batch_size = Q, Q.size(0) # (B, S, D) -proj-> (B, S, D) -split-> (B, S, H, W) -trans-> (B, H, S, W) q_s = self.W_Q(Q).view(batch_size, -1, n_heads, d_k).transpose(1,2) # q_s: [batch_size x n_heads x len_q x d_k] k_s = self.W_K(K).view(batch_size, -1, n_heads, d_k).transpose(1,2) # k_s: [batch_size x n_heads x len_k x d_k] v_s = self.W_V(V).view(batch_size, -1, n_heads, d_v).transpose(1,2) # v_s: [batch_size x n_heads x len_k x d_v] attn_mask = attn_mask.unsqueeze(1).repeat(1, n_heads, 1, 1) # attn_mask : [batch_size x n_heads x len_q x len_k] # context: [batch_size x n_heads x len_q x d_v], attn: [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)] context, attn = ScaledDotProductAttention()(q_s, k_s, v_s, attn_mask) context = context.transpose(1, 2).contiguous().view(batch_size, -1, n_heads * d_v) # context: [batch_size x len_q x n_heads * d_v] output = self.linear(context) return self.layer_norm(output + residual), attn # output: [batch_size x len_q x d_model] class PoswiseFeedForwardNet(nn.Module): def __init__(self): super(PoswiseFeedForwardNet, self).__init__() self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) self.layer_norm = nn.LayerNorm(d_model) def forward(self, inputs): residual = inputs # inputs : [batch_size, len_q, d_model] output = nn.ReLU()(self.conv1(inputs.transpose(1, 2))) output = self.conv2(output).transpose(1, 2) return self.layer_norm(output + residual) class EncoderLayer(nn.Module): def __init__(self): super(EncoderLayer, self).__init__() self.enc_self_attn = MultiHeadAttention() self.pos_ffn = PoswiseFeedForwardNet() def forward(self, enc_inputs, enc_self_attn_mask): enc_outputs, attn = self.enc_self_attn(enc_inputs, enc_inputs, enc_inputs, enc_self_attn_mask) # enc_inputs to same Q,K,V enc_outputs = self.pos_ffn(enc_outputs) # enc_outputs: [batch_size x len_q x d_model] return enc_outputs, attn class DecoderLayer(nn.Module): def __init__(self): super(DecoderLayer, self).__init__() self.dec_self_attn = MultiHeadAttention() self.dec_enc_attn = MultiHeadAttention() self.pos_ffn = PoswiseFeedForwardNet() def forward(self, dec_inputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask): dec_outputs, dec_self_attn = self.dec_self_attn(dec_inputs, dec_inputs, dec_inputs, dec_self_attn_mask) dec_outputs, dec_enc_attn = self.dec_enc_attn(dec_outputs, enc_outputs, enc_outputs, dec_enc_attn_mask) dec_outputs = self.pos_ffn(dec_outputs) return dec_outputs, dec_self_attn, dec_enc_attn class Encoder(nn.Module): def __init__(self): super(Encoder, self).__init__() self.src_emb = nn.Embedding(src_vocab_size, d_model) self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(src_len+1, d_model),freeze=True) self.layers = nn.ModuleList([EncoderLayer() for _ in range(n_layers)]) def forward(self, enc_inputs): # enc_inputs : [batch_size x source_len] enc_outputs = self.src_emb(enc_inputs) + self.pos_emb(torch.LongTensor([[1,2,3,4,0]])) enc_self_attn_mask = get_attn_pad_mask(enc_inputs, enc_inputs) enc_self_attns = [] for layer in self.layers: enc_outputs, enc_self_attn = layer(enc_outputs, enc_self_attn_mask) enc_self_attns.append(enc_self_attn) return enc_outputs, enc_self_attns class Decoder(nn.Module): def __init__(self): super(Decoder, self).__init__() self.tgt_emb = nn.Embedding(tgt_vocab_size, d_model) self.pos_emb = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(tgt_len+1, d_model),freeze=True) self.layers = nn.ModuleList([DecoderLayer() for _ in range(n_layers)]) def forward(self, dec_inputs, enc_inputs, enc_outputs): # dec_inputs : [batch_size x target_len] dec_outputs = self.tgt_emb(dec_inputs) + self.pos_emb(torch.LongTensor([[5,1,2,3,4]])) dec_self_attn_pad_mask = get_attn_pad_mask(dec_inputs, dec_inputs) dec_self_attn_subsequent_mask = get_attn_subsequent_mask(dec_inputs) dec_self_attn_mask = torch.gt((dec_self_attn_pad_mask + dec_self_attn_subsequent_mask), 0) dec_enc_attn_mask = get_attn_pad_mask(dec_inputs, enc_inputs) dec_self_attns, dec_enc_attns = [], [] for layer in self.layers: dec_outputs, dec_self_attn, dec_enc_attn = layer(dec_outputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask) dec_self_attns.append(dec_self_attn) dec_enc_attns.append(dec_enc_attn) return dec_outputs, dec_self_attns, dec_enc_attns class Transformer(nn.Module): def __init__(self): super(Transformer, self).__init__() self.encoder = Encoder() self.decoder = Decoder() self.projection = nn.Linear(d_model, tgt_vocab_size, bias=False) def forward(self, enc_inputs, dec_inputs): enc_outputs, enc_self_attns = self.encoder(enc_inputs) dec_outputs, dec_self_attns, dec_enc_attns = self.decoder(dec_inputs, enc_inputs, enc_outputs) dec_logits = self.projection(dec_outputs) # dec_logits : [batch_size x src_vocab_size x tgt_vocab_size] return dec_logits.view(-1, dec_logits.size(-1)), enc_self_attns, dec_self_attns, dec_enc_attns def showgraph(attn): attn = attn[-1].squeeze(0)[0] attn = attn.squeeze(0).data.numpy() fig = plt.figure(figsize=(n_heads, n_heads)) # [n_heads, n_heads] ax = fig.add_subplot(1, 1, 1) ax.matshow(attn, cmap='viridis') ax.set_xticklabels(['']+sentences[0].split(), fontdict={'fontsize': 14}, rotation=90) ax.set_yticklabels(['']+sentences[2].split(), fontdict={'fontsize': 14}) plt.show() if __name__ == '__main__': sentences = ['ich mochte ein bier P', 'S i want a beer', 'i want a beer E'] # Transformer Parameters # Padding Should be Zero src_vocab = {'P': 0, 'ich': 1, 'mochte': 2, 'ein': 3, 'bier': 4} src_vocab_size = len(src_vocab) tgt_vocab = {'P': 0, 'i': 1, 'want': 2, 'a': 3, 'beer': 4, 'S': 5, 'E': 6} number_dict = {i: w for i, w in enumerate(tgt_vocab)} tgt_vocab_size = len(tgt_vocab) src_len = 5 # length of source tgt_len = 5 # length of target d_model = 512 # Embedding Size d_ff = 2048 # FeedForward dimension d_k = d_v = 64 # dimension of K(=Q), V n_layers = 6 # number of Encoder of Decoder Layer n_heads = 8 # number of heads in Multi-Head Attention model = Transformer() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) enc_inputs, dec_inputs, target_batch = make_batch(sentences) for epoch in range(20): optimizer.zero_grad() outputs, enc_self_attns, dec_self_attns, dec_enc_attns = model(enc_inputs, dec_inputs) loss = criterion(outputs, target_batch.contiguous().view(-1)) print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) loss.backward() optimizer.step() # Test predict, _, _, _ = model(enc_inputs, dec_inputs) predict = predict.data.max(1, keepdim=True)[1] print(sentences[0], '->', [number_dict[n.item()] for n in predict.squeeze()]) print('first head of last state enc_self_attns') showgraph(enc_self_attns) print('first head of last state dec_self_attns') showgraph(dec_self_attns) print('first head of last state dec_enc_attns') showgraph(dec_enc_attns) ================================================ FILE: 5-2.BERT/BERT.ipynb ================================================ { "cells": [ { "cell_type": "code", "metadata": {}, "source": [ "# code by Tae Hwan Jung(Jeff Jung) @graykode\n", "# Reference : https://github.com/jadore801120/attention-is-all-you-need-pytorch\n", "# https://github.com/JayParks/transformer, https://github.com/dhlee347/pytorchic-bert\n", "import math\n", "import re\n", "from random import *\n", "import numpy as np\n", "import torch\n", "import torch.nn as nn\n", "import torch.optim as optim\n", "\n", "# sample IsNext and NotNext to be same in small batch size\n", "def make_batch():\n", " batch = []\n", " positive = negative = 0\n", " while positive != batch_size/2 or negative != batch_size/2:\n", " tokens_a_index, tokens_b_index= randrange(len(sentences)), randrange(len(sentences)) # sample random index in sentences\n", " tokens_a, tokens_b= token_list[tokens_a_index], token_list[tokens_b_index]\n", " input_ids = [word_dict['[CLS]']] + tokens_a + [word_dict['[SEP]']] + tokens_b + [word_dict['[SEP]']]\n", " segment_ids = [0] * (1 + len(tokens_a) + 1) + [1] * (len(tokens_b) + 1)\n", "\n", " # MASK LM\n", " n_pred = min(max_pred, max(1, int(round(len(input_ids) * 0.15)))) # 15 % of tokens in one sentence\n", " cand_maked_pos = [i for i, token in enumerate(input_ids)\n", " if token != word_dict['[CLS]'] and token != word_dict['[SEP]']]\n", " shuffle(cand_maked_pos)\n", " masked_tokens, masked_pos = [], []\n", " for pos in cand_maked_pos[:n_pred]:\n", " masked_pos.append(pos)\n", " masked_tokens.append(input_ids[pos])\n", " if random() < 0.8: # 80%\n", " input_ids[pos] = word_dict['[MASK]'] # make mask\n", " elif random() < 0.5: # 10%\n", " index = randint(0, vocab_size - 1) # random index in vocabulary\n", " input_ids[pos] = word_dict[number_dict[index]] # replace\n", "\n", " # Zero Paddings\n", " n_pad = maxlen - len(input_ids)\n", " input_ids.extend([0] * n_pad)\n", " segment_ids.extend([0] * n_pad)\n", "\n", " # Zero Padding (100% - 15%) tokens\n", " if max_pred > n_pred:\n", " n_pad = max_pred - n_pred\n", " masked_tokens.extend([0] * n_pad)\n", " masked_pos.extend([0] * n_pad)\n", "\n", " if tokens_a_index + 1 == tokens_b_index and positive < batch_size/2:\n", " batch.append([input_ids, segment_ids, masked_tokens, masked_pos, True]) # IsNext\n", " positive += 1\n", " elif tokens_a_index + 1 != tokens_b_index and negative < batch_size/2:\n", " batch.append([input_ids, segment_ids, masked_tokens, masked_pos, False]) # NotNext\n", " negative += 1\n", " return batch\n", "# Proprecessing Finished\n", "\n", "def get_attn_pad_mask(seq_q, seq_k):\n", " batch_size, len_q = seq_q.size()\n", " batch_size, len_k = seq_k.size()\n", " # eq(zero) is PAD token\n", " pad_attn_mask = seq_k.data.eq(0).unsqueeze(1) # batch_size x 1 x len_k(=len_q), one is masking\n", " return pad_attn_mask.expand(batch_size, len_q, len_k) # batch_size x len_q x len_k\n", "\n", "def gelu(x):\n", " \"Implementation of the gelu activation function by Hugging Face\"\n", " return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))\n", "\n", "class Embedding(nn.Module):\n", " def __init__(self):\n", " super(Embedding, self).__init__()\n", " self.tok_embed = nn.Embedding(vocab_size, d_model) # token embedding\n", " self.pos_embed = nn.Embedding(maxlen, d_model) # position embedding\n", " self.seg_embed = nn.Embedding(n_segments, d_model) # segment(token type) embedding\n", " self.norm = nn.LayerNorm(d_model)\n", "\n", " def forward(self, x, seg):\n", " seq_len = x.size(1)\n", " pos = torch.arange(seq_len, dtype=torch.long)\n", " pos = pos.unsqueeze(0).expand_as(x) # (seq_len,) -> (batch_size, seq_len)\n", " embedding = self.tok_embed(x) + self.pos_embed(pos) + self.seg_embed(seg)\n", " return self.norm(embedding)\n", "\n", "class ScaledDotProductAttention(nn.Module):\n", " def __init__(self):\n", " super(ScaledDotProductAttention, self).__init__()\n", "\n", " def forward(self, Q, K, V, attn_mask):\n", " scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(d_k) # scores : [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]\n", " scores.masked_fill_(attn_mask, -1e9) # Fills elements of self tensor with value where mask is one.\n", " attn = nn.Softmax(dim=-1)(scores)\n", " context = torch.matmul(attn, V)\n", " return context, attn\n", "\n", "class MultiHeadAttention(nn.Module):\n", " def __init__(self):\n", " super(MultiHeadAttention, self).__init__()\n", " self.W_Q = nn.Linear(d_model, d_k * n_heads)\n", " self.W_K = nn.Linear(d_model, d_k * n_heads)\n", " self.W_V = nn.Linear(d_model, d_v * n_heads)\n", " def forward(self, Q, K, V, attn_mask):\n", " # q: [batch_size x len_q x d_model], k: [batch_size x len_k x d_model], v: [batch_size x len_k x d_model]\n", " residual, batch_size = Q, Q.size(0)\n", " # (B, S, D) -proj-> (B, S, D) -split-> (B, S, H, W) -trans-> (B, H, S, W)\n", " q_s = self.W_Q(Q).view(batch_size, -1, n_heads, d_k).transpose(1,2) # q_s: [batch_size x n_heads x len_q x d_k]\n", " k_s = self.W_K(K).view(batch_size, -1, n_heads, d_k).transpose(1,2) # k_s: [batch_size x n_heads x len_k x d_k]\n", " v_s = self.W_V(V).view(batch_size, -1, n_heads, d_v).transpose(1,2) # v_s: [batch_size x n_heads x len_k x d_v]\n", "\n", " attn_mask = attn_mask.unsqueeze(1).repeat(1, n_heads, 1, 1) # attn_mask : [batch_size x n_heads x len_q x len_k]\n", "\n", " # context: [batch_size x n_heads x len_q x d_v], attn: [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]\n", " context, attn = ScaledDotProductAttention()(q_s, k_s, v_s, attn_mask)\n", " context = context.transpose(1, 2).contiguous().view(batch_size, -1, n_heads * d_v) # context: [batch_size x len_q x n_heads * d_v]\n", " output = nn.Linear(n_heads * d_v, d_model)(context)\n", " return nn.LayerNorm(d_model)(output + residual), attn # output: [batch_size x len_q x d_model]\n", "\n", "class PoswiseFeedForwardNet(nn.Module):\n", " def __init__(self):\n", " super(PoswiseFeedForwardNet, self).__init__()\n", " self.fc1 = nn.Linear(d_model, d_ff)\n", " self.fc2 = nn.Linear(d_ff, d_model)\n", "\n", " def forward(self, x):\n", " # (batch_size, len_seq, d_model) -> (batch_size, len_seq, d_ff) -> (batch_size, len_seq, d_model)\n", " return self.fc2(gelu(self.fc1(x)))\n", "\n", "class EncoderLayer(nn.Module):\n", " def __init__(self):\n", " super(EncoderLayer, self).__init__()\n", " self.enc_self_attn = MultiHeadAttention()\n", " self.pos_ffn = PoswiseFeedForwardNet()\n", "\n", " def forward(self, enc_inputs, enc_self_attn_mask):\n", " enc_outputs, attn = self.enc_self_attn(enc_inputs, enc_inputs, enc_inputs, enc_self_attn_mask) # enc_inputs to same Q,K,V\n", " enc_outputs = self.pos_ffn(enc_outputs) # enc_outputs: [batch_size x len_q x d_model]\n", " return enc_outputs, attn\n", "\n", "class BERT(nn.Module):\n", " def __init__(self):\n", " super(BERT, self).__init__()\n", " self.embedding = Embedding()\n", " self.layers = nn.ModuleList([EncoderLayer() for _ in range(n_layers)])\n", " self.fc = nn.Linear(d_model, d_model)\n", " self.activ1 = nn.Tanh()\n", " self.linear = nn.Linear(d_model, d_model)\n", " self.activ2 = gelu\n", " self.norm = nn.LayerNorm(d_model)\n", " self.classifier = nn.Linear(d_model, 2)\n", " # decoder is shared with embedding layer\n", " embed_weight = self.embedding.tok_embed.weight\n", " n_vocab, n_dim = embed_weight.size()\n", " self.decoder = nn.Linear(n_dim, n_vocab, bias=False)\n", " self.decoder.weight = embed_weight\n", " self.decoder_bias = nn.Parameter(torch.zeros(n_vocab))\n", "\n", " def forward(self, input_ids, segment_ids, masked_pos):\n", " output = self.embedding(input_ids, segment_ids)\n", " enc_self_attn_mask = get_attn_pad_mask(input_ids, input_ids)\n", " for layer in self.layers:\n", " output, enc_self_attn = layer(output, enc_self_attn_mask)\n", " # output : [batch_size, len, d_model], attn : [batch_size, n_heads, d_mode, d_model]\n", " # it will be decided by first token(CLS)\n", " h_pooled = self.activ1(self.fc(output[:, 0])) # [batch_size, d_model]\n", " logits_clsf = self.classifier(h_pooled) # [batch_size, 2]\n", "\n", " masked_pos = masked_pos[:, :, None].expand(-1, -1, output.size(-1)) # [batch_size, max_pred, d_model]\n", " # get masked position from final output of transformer.\n", " h_masked = torch.gather(output, 1, masked_pos) # masking position [batch_size, max_pred, d_model]\n", " h_masked = self.norm(self.activ2(self.linear(h_masked)))\n", " logits_lm = self.decoder(h_masked) + self.decoder_bias # [batch_size, max_pred, n_vocab]\n", "\n", " return logits_lm, logits_clsf\n", "\n", "if __name__ == '__main__':\n", " # BERT Parameters\n", " maxlen = 30 # maximum of length\n", " batch_size = 6\n", " max_pred = 5 # max tokens of prediction\n", " n_layers = 6 # number of Encoder of Encoder Layer\n", " n_heads = 12 # number of heads in Multi-Head Attention\n", " d_model = 768 # Embedding Size\n", " d_ff = 768 * 4 # 4*d_model, FeedForward dimension\n", " d_k = d_v = 64 # dimension of K(=Q), V\n", " n_segments = 2\n", "\n", " text = (\n", " 'Hello, how are you? I am Romeo.\\n'\n", " 'Hello, Romeo My name is Juliet. Nice to meet you.\\n'\n", " 'Nice meet you too. How are you today?\\n'\n", " 'Great. My baseball team won the competition.\\n'\n", " 'Oh Congratulations, Juliet\\n'\n", " 'Thanks you Romeo'\n", " )\n", " sentences = re.sub(\"[.,!?\\\\-]\", '', text.lower()).split('\\n') # filter '.', ',', '?', '!'\n", " word_list = list(set(\" \".join(sentences).split()))\n", " word_dict = {'[PAD]': 0, '[CLS]': 1, '[SEP]': 2, '[MASK]': 3}\n", " for i, w in enumerate(word_list):\n", " word_dict[w] = i + 4\n", " number_dict = {i: w for i, w in enumerate(word_dict)}\n", " vocab_size = len(word_dict)\n", "\n", " token_list = list()\n", " for sentence in sentences:\n", " arr = [word_dict[s] for s in sentence.split()]\n", " token_list.append(arr)\n", "\n", " model = BERT()\n", " criterion = nn.CrossEntropyLoss()\n", " optimizer = optim.Adam(model.parameters(), lr=0.001)\n", "\n", " batch = make_batch()\n", " input_ids, segment_ids, masked_tokens, masked_pos, isNext = map(torch.LongTensor, zip(*batch))\n", "\n", " for epoch in range(100):\n", " optimizer.zero_grad()\n", " logits_lm, logits_clsf = model(input_ids, segment_ids, masked_pos)\n", " loss_lm = criterion(logits_lm.transpose(1, 2), masked_tokens) # for masked LM\n", " loss_lm = (loss_lm.float()).mean()\n", " loss_clsf = criterion(logits_clsf, isNext) # for sentence classification\n", " loss = loss_lm + loss_clsf\n", " if (epoch + 1) % 10 == 0:\n", " print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n", " loss.backward()\n", " optimizer.step()\n", "\n", " # Predict mask tokens ans isNext\n", " input_ids, segment_ids, masked_tokens, masked_pos, isNext = map(torch.LongTensor, zip(batch[0]))\n", " print(text)\n", " print([number_dict[w.item()] for w in input_ids[0] if number_dict[w.item()] != '[PAD]'])\n", "\n", " logits_lm, logits_clsf = model(input_ids, segment_ids, masked_pos)\n", " logits_lm = logits_lm.data.max(2)[1][0].data.numpy()\n", " print('masked tokens list : ',[pos.item() for pos in masked_tokens[0] if pos.item() != 0])\n", " print('predict masked tokens list : ',[pos for pos in logits_lm if pos != 0])\n", "\n", " logits_clsf = logits_clsf.data.max(1)[1].data.numpy()[0]\n", " print('isNext : ', True if isNext else False)\n", " print('predict isNext : ',True if logits_clsf else False)\n" ], "outputs": [], "execution_count": null } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: 5-2.BERT/BERT.py ================================================ # %% # code by Tae Hwan Jung(Jeff Jung) @graykode # Reference : https://github.com/jadore801120/attention-is-all-you-need-pytorch # https://github.com/JayParks/transformer, https://github.com/dhlee347/pytorchic-bert import math import re from random import * import numpy as np import torch import torch.nn as nn import torch.optim as optim # sample IsNext and NotNext to be same in small batch size def make_batch(): batch = [] positive = negative = 0 while positive != batch_size/2 or negative != batch_size/2: tokens_a_index, tokens_b_index= randrange(len(sentences)), randrange(len(sentences)) # sample random index in sentences tokens_a, tokens_b= token_list[tokens_a_index], token_list[tokens_b_index] input_ids = [word_dict['[CLS]']] + tokens_a + [word_dict['[SEP]']] + tokens_b + [word_dict['[SEP]']] segment_ids = [0] * (1 + len(tokens_a) + 1) + [1] * (len(tokens_b) + 1) # MASK LM n_pred = min(max_pred, max(1, int(round(len(input_ids) * 0.15)))) # 15 % of tokens in one sentence cand_maked_pos = [i for i, token in enumerate(input_ids) if token != word_dict['[CLS]'] and token != word_dict['[SEP]']] shuffle(cand_maked_pos) masked_tokens, masked_pos = [], [] for pos in cand_maked_pos[:n_pred]: masked_pos.append(pos) masked_tokens.append(input_ids[pos]) if random() < 0.8: # 80% input_ids[pos] = word_dict['[MASK]'] # make mask elif random() < 0.5: # 10% index = randint(0, vocab_size - 1) # random index in vocabulary input_ids[pos] = word_dict[number_dict[index]] # replace # Zero Paddings n_pad = maxlen - len(input_ids) input_ids.extend([0] * n_pad) segment_ids.extend([0] * n_pad) # Zero Padding (100% - 15%) tokens if max_pred > n_pred: n_pad = max_pred - n_pred masked_tokens.extend([0] * n_pad) masked_pos.extend([0] * n_pad) if tokens_a_index + 1 == tokens_b_index and positive < batch_size/2: batch.append([input_ids, segment_ids, masked_tokens, masked_pos, True]) # IsNext positive += 1 elif tokens_a_index + 1 != tokens_b_index and negative < batch_size/2: batch.append([input_ids, segment_ids, masked_tokens, masked_pos, False]) # NotNext negative += 1 return batch # Proprecessing Finished def get_attn_pad_mask(seq_q, seq_k): batch_size, len_q = seq_q.size() batch_size, len_k = seq_k.size() # eq(zero) is PAD token pad_attn_mask = seq_k.data.eq(0).unsqueeze(1) # batch_size x 1 x len_k(=len_q), one is masking return pad_attn_mask.expand(batch_size, len_q, len_k) # batch_size x len_q x len_k def gelu(x): "Implementation of the gelu activation function by Hugging Face" return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0))) class Embedding(nn.Module): def __init__(self): super(Embedding, self).__init__() self.tok_embed = nn.Embedding(vocab_size, d_model) # token embedding self.pos_embed = nn.Embedding(maxlen, d_model) # position embedding self.seg_embed = nn.Embedding(n_segments, d_model) # segment(token type) embedding self.norm = nn.LayerNorm(d_model) def forward(self, x, seg): seq_len = x.size(1) pos = torch.arange(seq_len, dtype=torch.long) pos = pos.unsqueeze(0).expand_as(x) # (seq_len,) -> (batch_size, seq_len) embedding = self.tok_embed(x) + self.pos_embed(pos) + self.seg_embed(seg) return self.norm(embedding) class ScaledDotProductAttention(nn.Module): def __init__(self): super(ScaledDotProductAttention, self).__init__() def forward(self, Q, K, V, attn_mask): scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(d_k) # scores : [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)] scores.masked_fill_(attn_mask, -1e9) # Fills elements of self tensor with value where mask is one. attn = nn.Softmax(dim=-1)(scores) context = torch.matmul(attn, V) return context, attn class MultiHeadAttention(nn.Module): def __init__(self): super(MultiHeadAttention, self).__init__() self.W_Q = nn.Linear(d_model, d_k * n_heads) self.W_K = nn.Linear(d_model, d_k * n_heads) self.W_V = nn.Linear(d_model, d_v * n_heads) def forward(self, Q, K, V, attn_mask): # q: [batch_size x len_q x d_model], k: [batch_size x len_k x d_model], v: [batch_size x len_k x d_model] residual, batch_size = Q, Q.size(0) # (B, S, D) -proj-> (B, S, D) -split-> (B, S, H, W) -trans-> (B, H, S, W) q_s = self.W_Q(Q).view(batch_size, -1, n_heads, d_k).transpose(1,2) # q_s: [batch_size x n_heads x len_q x d_k] k_s = self.W_K(K).view(batch_size, -1, n_heads, d_k).transpose(1,2) # k_s: [batch_size x n_heads x len_k x d_k] v_s = self.W_V(V).view(batch_size, -1, n_heads, d_v).transpose(1,2) # v_s: [batch_size x n_heads x len_k x d_v] attn_mask = attn_mask.unsqueeze(1).repeat(1, n_heads, 1, 1) # attn_mask : [batch_size x n_heads x len_q x len_k] # context: [batch_size x n_heads x len_q x d_v], attn: [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)] context, attn = ScaledDotProductAttention()(q_s, k_s, v_s, attn_mask) context = context.transpose(1, 2).contiguous().view(batch_size, -1, n_heads * d_v) # context: [batch_size x len_q x n_heads * d_v] output = nn.Linear(n_heads * d_v, d_model)(context) return nn.LayerNorm(d_model)(output + residual), attn # output: [batch_size x len_q x d_model] class PoswiseFeedForwardNet(nn.Module): def __init__(self): super(PoswiseFeedForwardNet, self).__init__() self.fc1 = nn.Linear(d_model, d_ff) self.fc2 = nn.Linear(d_ff, d_model) def forward(self, x): # (batch_size, len_seq, d_model) -> (batch_size, len_seq, d_ff) -> (batch_size, len_seq, d_model) return self.fc2(gelu(self.fc1(x))) class EncoderLayer(nn.Module): def __init__(self): super(EncoderLayer, self).__init__() self.enc_self_attn = MultiHeadAttention() self.pos_ffn = PoswiseFeedForwardNet() def forward(self, enc_inputs, enc_self_attn_mask): enc_outputs, attn = self.enc_self_attn(enc_inputs, enc_inputs, enc_inputs, enc_self_attn_mask) # enc_inputs to same Q,K,V enc_outputs = self.pos_ffn(enc_outputs) # enc_outputs: [batch_size x len_q x d_model] return enc_outputs, attn class BERT(nn.Module): def __init__(self): super(BERT, self).__init__() self.embedding = Embedding() self.layers = nn.ModuleList([EncoderLayer() for _ in range(n_layers)]) self.fc = nn.Linear(d_model, d_model) self.activ1 = nn.Tanh() self.linear = nn.Linear(d_model, d_model) self.activ2 = gelu self.norm = nn.LayerNorm(d_model) self.classifier = nn.Linear(d_model, 2) # decoder is shared with embedding layer embed_weight = self.embedding.tok_embed.weight n_vocab, n_dim = embed_weight.size() self.decoder = nn.Linear(n_dim, n_vocab, bias=False) self.decoder.weight = embed_weight self.decoder_bias = nn.Parameter(torch.zeros(n_vocab)) def forward(self, input_ids, segment_ids, masked_pos): output = self.embedding(input_ids, segment_ids) enc_self_attn_mask = get_attn_pad_mask(input_ids, input_ids) for layer in self.layers: output, enc_self_attn = layer(output, enc_self_attn_mask) # output : [batch_size, len, d_model], attn : [batch_size, n_heads, d_mode, d_model] # it will be decided by first token(CLS) h_pooled = self.activ1(self.fc(output[:, 0])) # [batch_size, d_model] logits_clsf = self.classifier(h_pooled) # [batch_size, 2] masked_pos = masked_pos[:, :, None].expand(-1, -1, output.size(-1)) # [batch_size, max_pred, d_model] # get masked position from final output of transformer. h_masked = torch.gather(output, 1, masked_pos) # masking position [batch_size, max_pred, d_model] h_masked = self.norm(self.activ2(self.linear(h_masked))) logits_lm = self.decoder(h_masked) + self.decoder_bias # [batch_size, max_pred, n_vocab] return logits_lm, logits_clsf if __name__ == '__main__': # BERT Parameters maxlen = 30 # maximum of length batch_size = 6 max_pred = 5 # max tokens of prediction n_layers = 6 # number of Encoder of Encoder Layer n_heads = 12 # number of heads in Multi-Head Attention d_model = 768 # Embedding Size d_ff = 768 * 4 # 4*d_model, FeedForward dimension d_k = d_v = 64 # dimension of K(=Q), V n_segments = 2 text = ( 'Hello, how are you? I am Romeo.\n' 'Hello, Romeo My name is Juliet. Nice to meet you.\n' 'Nice meet you too. How are you today?\n' 'Great. My baseball team won the competition.\n' 'Oh Congratulations, Juliet\n' 'Thanks you Romeo' ) sentences = re.sub("[.,!?\\-]", '', text.lower()).split('\n') # filter '.', ',', '?', '!' word_list = list(set(" ".join(sentences).split())) word_dict = {'[PAD]': 0, '[CLS]': 1, '[SEP]': 2, '[MASK]': 3} for i, w in enumerate(word_list): word_dict[w] = i + 4 number_dict = {i: w for i, w in enumerate(word_dict)} vocab_size = len(word_dict) token_list = list() for sentence in sentences: arr = [word_dict[s] for s in sentence.split()] token_list.append(arr) model = BERT() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) batch = make_batch() input_ids, segment_ids, masked_tokens, masked_pos, isNext = map(torch.LongTensor, zip(*batch)) for epoch in range(100): optimizer.zero_grad() logits_lm, logits_clsf = model(input_ids, segment_ids, masked_pos) loss_lm = criterion(logits_lm.transpose(1, 2), masked_tokens) # for masked LM loss_lm = (loss_lm.float()).mean() loss_clsf = criterion(logits_clsf, isNext) # for sentence classification loss = loss_lm + loss_clsf if (epoch + 1) % 10 == 0: print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) loss.backward() optimizer.step() # Predict mask tokens ans isNext input_ids, segment_ids, masked_tokens, masked_pos, isNext = map(torch.LongTensor, zip(batch[0])) print(text) print([number_dict[w.item()] for w in input_ids[0] if number_dict[w.item()] != '[PAD]']) logits_lm, logits_clsf = model(input_ids, segment_ids, masked_pos) logits_lm = logits_lm.data.max(2)[1][0].data.numpy() print('masked tokens list : ',[pos.item() for pos in masked_tokens[0] if pos.item() != 0]) print('predict masked tokens list : ',[pos for pos in logits_lm if pos != 0]) logits_clsf = logits_clsf.data.max(1)[1].data.numpy()[0] print('isNext : ', True if isNext else False) print('predict isNext : ',True if logits_clsf else False) ================================================ FILE: CONTRIBUTING.md ================================================ ## Contribution Guidelines Thank you to everyone who contributes. Here are some rules to follow before contributing. 1. Contributions are open to the smallest details such as typos, comments and code refactors. 2. Do not commit the jupyter notebook file(*.ipynb). When the modified python code is merged into the master branch, the github action automatically generates an ipynb. 3. Please attach a commit message appropriate to the modified code. ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2019 TaeHwan Jung Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ ## nlp-tutorial

`nlp-tutorial` is a tutorial for who is studying NLP(Natural Language Processing) using **Pytorch**. Most of the models in NLP were implemented with less than **100 lines** of code.(except comments or blank lines) - [08-14-2020] Old TensorFlow v1 code is archived in [the archive folder](archive). For beginner readability, only pytorch version 1.0 or higher is supported. ## Curriculum - (Example Purpose) #### 1. Basic Embedding Model - 1-1. [NNLM(Neural Network Language Model)](1-1.NNLM) - **Predict Next Word** - Paper - [A Neural Probabilistic Language Model(2003)](http://www.jmlr.org/papers/volume3/bengio03a/bengio03a.pdf) - Colab - [NNLM.ipynb](https://colab.research.google.com/github/graykode/nlp-tutorial/blob/master/1-1.NNLM/NNLM.ipynb) - 1-2. [Word2Vec(Skip-gram)](1-2.Word2Vec) - **Embedding Words and Show Graph** - Paper - [Distributed Representations of Words and Phrases and their Compositionality(2013)](https://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf) - Colab - [Word2Vec.ipynb](https://colab.research.google.com/github/graykode/nlp-tutorial/blob/master/1-2.Word2Vec/Word2Vec_Skipgram(Softmax).ipynb) - 1-3. [FastText(Application Level)](1-3.FastText) - **Sentence Classification** - Paper - [Bag of Tricks for Efficient Text Classification(2016)](https://arxiv.org/pdf/1607.01759.pdf) - Colab - [FastText.ipynb](https://colab.research.google.com/github/graykode/nlp-tutorial/blob/master/1-3.FastText/FastText.ipynb) #### 2. CNN(Convolutional Neural Network) - 2-1. [TextCNN](2-1.TextCNN) - **Binary Sentiment Classification** - Paper - [Convolutional Neural Networks for Sentence Classification(2014)](http://www.aclweb.org/anthology/D14-1181) - [TextCNN.ipynb](https://colab.research.google.com/github/graykode/nlp-tutorial/blob/master/2-1.TextCNN/TextCNN.ipynb) #### 3. RNN(Recurrent Neural Network) - 3-1. [TextRNN](3-1.TextRNN) - **Predict Next Step** - Paper - [Finding Structure in Time(1990)](http://psych.colorado.edu/~kimlab/Elman1990.pdf) - Colab - [TextRNN.ipynb](https://colab.research.google.com/github/graykode/nlp-tutorial/blob/master/3-1.TextRNN/TextRNN.ipynb) - 3-2. [TextLSTM](https://github.com/graykode/nlp-tutorial/tree/master/3-2.TextLSTM) - **Autocomplete** - Paper - [LONG SHORT-TERM MEMORY(1997)](https://www.bioinf.jku.at/publications/older/2604.pdf) - Colab - [TextLSTM.ipynb](https://colab.research.google.com/github/graykode/nlp-tutorial/blob/master/3-2.TextLSTM/TextLSTM.ipynb) - 3-3. [Bi-LSTM](3-3.Bi-LSTM) - **Predict Next Word in Long Sentence** - Colab - [Bi_LSTM.ipynb](https://colab.research.google.com/github/graykode/nlp-tutorial/blob/master/3-3.Bi-LSTM/Bi_LSTM.ipynb) #### 4. Attention Mechanism - 4-1. [Seq2Seq](4-1.Seq2Seq) - **Change Word** - Paper - [Learning Phrase Representations using RNN Encoder–Decoder for Statistical Machine Translation(2014)](https://arxiv.org/pdf/1406.1078.pdf) - Colab - [Seq2Seq.ipynb](https://colab.research.google.com/github/graykode/nlp-tutorial/blob/master/4-1.Seq2Seq/Seq2Seq.ipynb) - 4-2. [Seq2Seq with Attention](4-2.Seq2Seq(Attention)) - **Translate** - Paper - [Neural Machine Translation by Jointly Learning to Align and Translate(2014)](https://arxiv.org/abs/1409.0473) - Colab - [Seq2Seq(Attention).ipynb](https://colab.research.google.com/github/graykode/nlp-tutorial/blob/master/4-2.Seq2Seq(Attention)/Seq2Seq(Attention).ipynb) - 4-3. [Bi-LSTM with Attention](4-3.Bi-LSTM(Attention)) - **Binary Sentiment Classification** - Colab - [Bi_LSTM(Attention).ipynb](https://colab.research.google.com/github/graykode/nlp-tutorial/blob/master/4-3.Bi-LSTM(Attention)/Bi_LSTM(Attention).ipynb) #### 5. Model based on Transformer - 5-1. [The Transformer](5-1.Transformer) - **Translate** - Paper - [Attention Is All You Need(2017)](https://arxiv.org/abs/1706.03762) - Colab - [Transformer.ipynb](https://colab.research.google.com/github/graykode/nlp-tutorial/blob/master/5-1.Transformer/Transformer.ipynb), [Transformer(Greedy_decoder).ipynb](https://colab.research.google.com/github/graykode/nlp-tutorial/blob/master/5-1.Transformer/Transformer(Greedy_decoder).ipynb) - 5-2. [BERT](5-2.BERT) - **Classification Next Sentence & Predict Masked Tokens** - Paper - [BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding(2018)](https://arxiv.org/abs/1810.04805) - Colab - [BERT.ipynb](https://colab.research.google.com/github/graykode/nlp-tutorial/blob/master/5-2.BERT/BERT.ipynb) ## Dependencies - Python 3.5+ - Pytorch 1.0.0+ ## Author - Tae Hwan Jung(Jeff Jung) @graykode - Author Email : nlkey2022@gmail.com - Acknowledgements to [mojitok](http://mojitok.com/) as NLP Research Internship. ================================================ FILE: archive/tensorflow/v1/1-1.NNLM/NNLM.py ================================================ # code by Tae Hwan Jung @graykode import tensorflow as tf import numpy as np tf.reset_default_graph() sentences = [ "i like dog", "i love coffee", "i hate milk"] word_list = " ".join(sentences).split() word_list = list(set(word_list)) word_dict = {w: i for i, w in enumerate(word_list)} number_dict = {i: w for i, w in enumerate(word_list)} n_class = len(word_dict) # number of Vocabulary # NNLM Parameter n_step = 2 # number of steps ['i like', 'i love', 'i hate'] n_hidden = 2 # number of hidden units def make_batch(sentences): input_batch = [] target_batch = [] for sen in sentences: word = sen.split() input = [word_dict[n] for n in word[:-1]] target = word_dict[word[-1]] input_batch.append(np.eye(n_class)[input]) target_batch.append(np.eye(n_class)[target]) return input_batch, target_batch # Model X = tf.placeholder(tf.float32, [None, n_step, n_class]) # [batch_size, number of steps, number of Vocabulary] Y = tf.placeholder(tf.float32, [None, n_class]) input = tf.reshape(X, shape=[-1, n_step * n_class]) # [batch_size, n_step * n_class] H = tf.Variable(tf.random_normal([n_step * n_class, n_hidden])) d = tf.Variable(tf.random_normal([n_hidden])) U = tf.Variable(tf.random_normal([n_hidden, n_class])) b = tf.Variable(tf.random_normal([n_class])) tanh = tf.nn.tanh(d + tf.matmul(input, H)) # [batch_size, n_hidden] model = tf.matmul(tanh, U) + b # [batch_size, n_class] cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=model, labels=Y)) optimizer = tf.train.AdamOptimizer(0.001).minimize(cost) prediction =tf.argmax(model, 1) # Training init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) input_batch, target_batch = make_batch(sentences) for epoch in range(5000): _, loss = sess.run([optimizer, cost], feed_dict={X: input_batch, Y: target_batch}) if (epoch + 1)%1000 == 0: print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) # Predict predict = sess.run([prediction], feed_dict={X: input_batch}) # Test input = [sen.split()[:2] for sen in sentences] print([sen.split()[:2] for sen in sentences], '->', [number_dict[n] for n in predict[0]]) ================================================ FILE: archive/tensorflow/v1/1-2.Word2Vec/Word2Vec-Skipgram(NCE_loss).py ================================================ ''' code by Tae Hwan Jung(Jeff Jung) @graykode reference : https://github.com/golbin/TensorFlow-Tutorials/blob/master/04%20-%20Neural%20Network%20Basic/03%20-%20Word2Vec.py ''' import tensorflow as tf import matplotlib.pyplot as plt import numpy as np tf.reset_default_graph() # 3 Words Sentence sentences = [ "i like dog", "i like cat", "i like animal", "dog cat animal", "apple cat dog like", "dog fish milk like", "dog cat eyes like", "i like apple", "apple i hate", "apple i movie book music like", "cat dog hate", "cat dog like"] word_sequence = " ".join(sentences).split() word_list = " ".join(sentences).split() word_list = list(set(word_list)) word_dict = {w: i for i, w in enumerate(word_list)} # Word2Vec Parameter batch_size = 20 embedding_size = 2 # To show 2 dim embedding graph num_sampled = 10 # for negative sampling, less than batch_size voc_size = len(word_list) def random_batch(data, size): random_inputs = [] random_labels = [] random_index = np.random.choice(range(len(data)), size, replace=False) for i in random_index: random_inputs.append(data[i][0]) # target random_labels.append([data[i][1]]) # context word return random_inputs, random_labels # Make skip gram of one size window skip_grams = [] for i in range(1, len(word_sequence) - 1): target = word_dict[word_sequence[i]] context = [word_dict[word_sequence[i - 1]], word_dict[word_sequence[i + 1]]] for w in context: skip_grams.append([target, w]) # Model inputs = tf.placeholder(tf.int32, shape=[batch_size]) labels = tf.placeholder(tf.int32, shape=[batch_size, 1]) # To use tf.nn.nce_loss, [batch_size, 1] embeddings = tf.Variable(tf.random_uniform([voc_size, embedding_size], -1.0, 1.0)) selected_embed = tf.nn.embedding_lookup(embeddings, inputs) nce_weights = tf.Variable(tf.random_uniform([voc_size, embedding_size], -1.0, 1.0)) nce_biases = tf.Variable(tf.zeros([voc_size])) # Loss and optimizer cost = tf.reduce_mean(tf.nn.nce_loss(nce_weights, nce_biases, labels, selected_embed, num_sampled, voc_size)) optimizer = tf.train.AdamOptimizer(0.001).minimize(cost) # Training with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) for epoch in range(5000): batch_inputs, batch_labels = random_batch(skip_grams, batch_size) _, loss = sess.run([optimizer, cost], feed_dict={inputs: batch_inputs, labels: batch_labels}) if (epoch + 1) % 1000 == 0: print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) trained_embeddings = embeddings.eval() for i, label in enumerate(word_list): x, y = trained_embeddings[i] plt.scatter(x, y) plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom') plt.show() ================================================ FILE: archive/tensorflow/v1/1-2.Word2Vec/Word2Vec-Skipgram(Softmax).py ================================================ ''' code by Tae Hwan Jung(Jeff Jung) @graykode ''' import tensorflow as tf import matplotlib.pyplot as plt import numpy as np tf.reset_default_graph() # 3 Words Sentence sentences = [ "i like dog", "i like cat", "i like animal", "dog cat animal", "apple cat dog like", "dog fish milk like", "dog cat eyes like", "i like apple", "apple i hate", "apple i movie book music like", "cat dog hate", "cat dog like"] word_sequence = " ".join(sentences).split() word_list = " ".join(sentences).split() word_list = list(set(word_list)) word_dict = {w: i for i, w in enumerate(word_list)} # Word2Vec Parameter batch_size = 20 embedding_size = 2 # To show 2 dim embedding graph voc_size = len(word_list) def random_batch(data, size): random_inputs = [] random_labels = [] random_index = np.random.choice(range(len(data)), size, replace=False) for i in random_index: random_inputs.append(np.eye(voc_size)[data[i][0]]) # target random_labels.append(np.eye(voc_size)[data[i][1]]) # context word return random_inputs, random_labels # Make skip gram of one size window skip_grams = [] for i in range(1, len(word_sequence) - 1): target = word_dict[word_sequence[i]] context = [word_dict[word_sequence[i - 1]], word_dict[word_sequence[i + 1]]] for w in context: skip_grams.append([target, w]) # Model inputs = tf.placeholder(tf.float32, shape=[None, voc_size]) labels = tf.placeholder(tf.float32, shape=[None, voc_size]) # W and WT is not Traspose relationship W = tf.Variable(tf.random_uniform([voc_size, embedding_size], -1.0, 1.0)) WT = tf.Variable(tf.random_uniform([embedding_size, voc_size], -1.0, 1.0)) hidden_layer = tf.matmul(inputs, W) # [batch_size, embedding_size] output_layer = tf.matmul(hidden_layer, WT) # [batch_size, voc_size] cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=output_layer, labels=labels)) optimizer = tf.train.AdamOptimizer(0.001).minimize(cost) with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) for epoch in range(5000): batch_inputs, batch_labels = random_batch(skip_grams, batch_size) _, loss = sess.run([optimizer, cost], feed_dict={inputs: batch_inputs, labels: batch_labels}) if (epoch + 1)%1000 == 0: print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) trained_embeddings = W.eval() for i, label in enumerate(word_list): x, y = trained_embeddings[i] plt.scatter(x, y) plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom') plt.show() ================================================ FILE: archive/tensorflow/v1/2-1.TextCNN/TextCNN.py ================================================ ''' code by Tae Hwan Jung(Jeff Jung) @graykode Reference : https://github.com/ioatr/textcnn ''' import tensorflow as tf import numpy as np tf.reset_default_graph() # Text-CNN Parameter embedding_size = 2 # n-gram sequence_length = 3 num_classes = 2 # 0 or 1 filter_sizes = [2,2,2] # n-gram window num_filters = 3 # 3 words sentences (=sequence_length is 3) sentences = ["i love you","he loves me", "she likes baseball", "i hate you","sorry for that", "this is awful"] labels = [1,1,1,0,0,0] # 1 is good, 0 is not good. word_list = " ".join(sentences).split() word_list = list(set(word_list)) word_dict = {w: i for i, w in enumerate(word_list)} vocab_size = len(word_dict) inputs = [] for sen in sentences: inputs.append(np.asarray([word_dict[n] for n in sen.split()])) outputs = [] for out in labels: outputs.append(np.eye(num_classes)[out]) # ONE-HOT : To using Tensor Softmax Loss function # Model X = tf.placeholder(tf.int32, [None, sequence_length]) Y = tf.placeholder(tf.int32, [None, num_classes]) W = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0)) embedded_chars = tf.nn.embedding_lookup(W, X) # [batch_size, sequence_length, embedding_size] embedded_chars = tf.expand_dims(embedded_chars, -1) # add channel(=1) [batch_size, sequence_length, embedding_size, 1] pooled_outputs = [] for i, filter_size in enumerate(filter_sizes): filter_shape = [filter_size, embedding_size, 1, num_filters] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1)) b = tf.Variable(tf.constant(0.1, shape=[num_filters])) conv = tf.nn.conv2d(embedded_chars, # [batch_size, sequence_length, embedding_size, 1] W, # [filter_size(n-gram window), embedding_size, 1, num_filters(=3)] strides=[1, 1, 1, 1], padding='VALID') h = tf.nn.relu(tf.nn.bias_add(conv, b)) pooled = tf.nn.max_pool(h, ksize=[1, sequence_length - filter_size + 1, 1, 1], # [batch_size, filter_height, filter_width, channel] strides=[1, 1, 1, 1], padding='VALID') pooled_outputs.append(pooled) # dim of pooled : [batch_size(=6), output_height(=1), output_width(=1), channel(=1)] num_filters_total = num_filters * len(filter_sizes) h_pool = tf.concat(pooled_outputs, num_filters) # h_pool : [batch_size(=6), output_height(=1), output_width(=1), channel(=1) * 3] h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total]) # [batch_size, ] # Model-Training Weight = tf.get_variable('W', shape=[num_filters_total, num_classes], initializer=tf.contrib.layers.xavier_initializer()) Bias = tf.Variable(tf.constant(0.1, shape=[num_classes])) model = tf.nn.xw_plus_b(h_pool_flat, Weight, Bias) cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=model, labels=Y)) optimizer = tf.train.AdamOptimizer(0.001).minimize(cost) # Model-Predict hypothesis = tf.nn.softmax(model) predictions = tf.argmax(hypothesis, 1) # Training init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) for epoch in range(5000): _, loss = sess.run([optimizer, cost], feed_dict={X: inputs, Y: outputs}) if (epoch + 1)%1000 == 0: print('Epoch:', '%06d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) # Test test_text = 'sorry hate you' tests = [] tests.append(np.asarray([word_dict[n] for n in test_text.split()])) predict = sess.run([predictions], feed_dict={X: tests}) result = predict[0][0] if result == 0: print(test_text,"is Bad Mean...") else: print(test_text,"is Good Mean!!") ================================================ FILE: archive/tensorflow/v1/3-1.TextRNN/TextRNN.py ================================================ ''' code by Tae Hwan Jung(Jeff Jung) @graykode ''' import tensorflow as tf import numpy as np tf.reset_default_graph() sentences = [ "i like dog", "i love coffee", "i hate milk"] word_list = " ".join(sentences).split() word_list = list(set(word_list)) word_dict = {w: i for i, w in enumerate(word_list)} number_dict = {i: w for i, w in enumerate(word_list)} n_class = len(word_dict) # TextRNN Parameter n_step = 2 # number of cells(= number of Step) n_hidden = 5 # number of hidden units in one cell def make_batch(sentences): input_batch = [] target_batch = [] for sen in sentences: word = sen.split() input = [word_dict[n] for n in word[:-1]] target = word_dict[word[-1]] input_batch.append(np.eye(n_class)[input]) target_batch.append(np.eye(n_class)[target]) return input_batch, target_batch # Model X = tf.placeholder(tf.float32, [None, n_step, n_class]) # [batch_size, n_step, n_class] Y = tf.placeholder(tf.float32, [None, n_class]) # [batch_size, n_class] W = tf.Variable(tf.random_normal([n_hidden, n_class])) b = tf.Variable(tf.random_normal([n_class])) cell = tf.nn.rnn_cell.BasicRNNCell(n_hidden) outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32) # outputs : [batch_size, n_step, n_hidden] outputs = tf.transpose(outputs, [1, 0, 2]) # [n_step, batch_size, n_hidden] outputs = outputs[-1] # [batch_size, n_hidden] model = tf.matmul(outputs, W) + b # model : [batch_size, n_class] cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=model, labels=Y)) optimizer = tf.train.AdamOptimizer(0.001).minimize(cost) prediction = tf.cast(tf.argmax(model, 1), tf.int32) # Training init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) input_batch, target_batch = make_batch(sentences) for epoch in range(5000): _, loss = sess.run([optimizer, cost], feed_dict={X: input_batch, Y: target_batch}) if (epoch + 1)%1000 == 0: print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) input = [sen.split()[:2] for sen in sentences] predict = sess.run([prediction], feed_dict={X: input_batch}) print([sen.split()[:2] for sen in sentences], '->', [number_dict[n] for n in predict[0]]) ================================================ FILE: archive/tensorflow/v1/3-2.TextLSTM/TextLSTM.py ================================================ ''' code by Tae Hwan Jung(Jeff Jung) @graykode ''' import tensorflow as tf import numpy as np tf.reset_default_graph() char_arr = [c for c in 'abcdefghijklmnopqrstuvwxyz'] word_dict = {n: i for i, n in enumerate(char_arr)} number_dict = {i: w for i, w in enumerate(char_arr)} n_class = len(word_dict) # number of class(=number of vocab) seq_data = ['make', 'need', 'coal', 'word', 'love', 'hate', 'live', 'home', 'hash', 'star'] # TextLSTM Parameters n_step = 3 n_hidden = 128 def make_batch(seq_data): input_batch, target_batch = [], [] for seq in seq_data: input = [word_dict[n] for n in seq[:-1]] # 'm', 'a' , 'k' is input target = word_dict[seq[-1]] # 'e' is target input_batch.append(np.eye(n_class)[input]) target_batch.append(np.eye(n_class)[target]) return input_batch, target_batch # Model X = tf.placeholder(tf.float32, [None, n_step, n_class]) # [batch_size, n_step, n_class] Y = tf.placeholder(tf.float32, [None, n_class]) # [batch_size, n_class] W = tf.Variable(tf.random_normal([n_hidden, n_class])) b = tf.Variable(tf.random_normal([n_class])) cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden) outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32) # outputs : [batch_size, n_step, n_hidden] outputs = tf.transpose(outputs, [1, 0, 2]) # [n_step, batch_size, n_hidden] outputs = outputs[-1] # [batch_size, n_hidden] model = tf.matmul(outputs, W) + b # model : [batch_size, n_class] cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=model, labels=Y)) optimizer = tf.train.AdamOptimizer(0.001).minimize(cost) prediction = tf.cast(tf.argmax(model, 1), tf.int32) # Training init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) input_batch, target_batch = make_batch(seq_data) for epoch in range(1000): _, loss = sess.run([optimizer, cost], feed_dict={X: input_batch, Y: target_batch}) if (epoch + 1)%100 == 0: print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) inputs = [sen[:3] for sen in seq_data] predict = sess.run([prediction], feed_dict={X: input_batch}) print(inputs, '->', [number_dict[n] for n in predict[0]]) ================================================ FILE: archive/tensorflow/v1/3-3.Bi-LSTM/Bi-LSTM.py ================================================ ''' code by Tae Hwan Jung(Jeff Jung) @graykode ''' import tensorflow as tf import numpy as np tf.reset_default_graph() sentence = ( 'Lorem ipsum dolor sit amet consectetur adipisicing elit ' 'sed do eiusmod tempor incididunt ut labore et dolore magna ' 'aliqua Ut enim ad minim veniam quis nostrud exercitation' ) word_dict = {w: i for i, w in enumerate(list(set(sentence.split())))} number_dict = {i: w for i, w in enumerate(list(set(sentence.split())))} n_class = len(word_dict) n_step = len(sentence.split()) n_hidden = 5 def make_batch(sentence): input_batch = [] target_batch = [] words = sentence.split() for i, word in enumerate(words[:-1]): input = [word_dict[n] for n in words[:(i + 1)]] input = input + [0] * (n_step - len(input)) target = word_dict[words[i + 1]] input_batch.append(np.eye(n_class)[input]) target_batch.append(np.eye(n_class)[target]) return input_batch, target_batch # Bi-LSTM Model X = tf.placeholder(tf.float32, [None, n_step, n_class]) Y = tf.placeholder(tf.float32, [None, n_class]) W = tf.Variable(tf.random_normal([n_hidden * 2, n_class])) b = tf.Variable(tf.random_normal([n_class])) lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(n_hidden) lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(n_hidden) # outputs : [batch_size, len_seq, n_hidden], states : [batch_size, n_hidden] outputs, _ = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell,lstm_bw_cell, X, dtype=tf.float32) outputs = tf.concat([outputs[0], outputs[1]], 2) # output[0] : lstm_fw, output[1] : lstm_bw outputs = tf.transpose(outputs, [1, 0, 2]) # [n_step, batch_size, n_hidden] outputs = outputs[-1] # [batch_size, n_hidden] model = tf.matmul(outputs, W) + b cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=model, labels=Y)) optimizer = tf.train.AdamOptimizer(0.001).minimize(cost) prediction = tf.cast(tf.argmax(model, 1), tf.int32) # Training init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) input_batch, target_batch = make_batch(sentence) for epoch in range(10000): _, loss = sess.run([optimizer, cost], feed_dict={X: input_batch, Y: target_batch}) if (epoch + 1)%1000 == 0: print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) predict = sess.run([prediction], feed_dict={X: input_batch}) print(sentence) print([number_dict[n] for n in [pre for pre in predict[0]]]) ================================================ FILE: archive/tensorflow/v1/4-1.Seq2Seq/Seq2Seq.py ================================================ ''' code by Tae Hwan Jung(Jeff Jung) @graykode reference : https://github.com/golbin/TensorFlow-Tutorials/blob/master/10%20-%20RNN/03%20-%20Seq2Seq.py ''' import tensorflow as tf import numpy as np tf.reset_default_graph() # S: Symbol that shows starting of decoding input # E: Symbol that shows starting of decoding output # P: Symbol that will fill in blank sequence if current batch data size is short than time steps char_arr = [c for c in 'SEPabcdefghijklmnopqrstuvwxyz'] num_dic = {n: i for i, n in enumerate(char_arr)} seq_data = [['man', 'women'], ['black', 'white'], ['king', 'queen'], ['girl', 'boy'], ['up', 'down'], ['high', 'low']] # Seq2Seq Parameter n_step = 5 n_hidden = 128 n_class = len(num_dic) # number of class(=number of vocab) def make_batch(seq_data): input_batch, output_batch, target_batch = [], [], [] for seq in seq_data: for i in range(2): seq[i] = seq[i] + 'P' * (n_step - len(seq[i])) input = [num_dic[n] for n in seq[0]] output = [num_dic[n] for n in ('S' + seq[1])] target = [num_dic[n] for n in (seq[1] + 'E')] input_batch.append(np.eye(n_class)[input]) output_batch.append(np.eye(n_class)[output]) target_batch.append(target) return input_batch, output_batch, target_batch # Model enc_input = tf.placeholder(tf.float32, [None, None, n_class]) # [batch_size, max_len(=encoder_step), n_class] dec_input = tf.placeholder(tf.float32, [None, None, n_class]) # [batch_size, max_len+1(=decoder_step) (becase of 'S' or 'E'), n_class] targets = tf.placeholder(tf.int64, [None, None]) # [batch_size, max_len+1], not one-hot with tf.variable_scope('encode'): enc_cell = tf.nn.rnn_cell.BasicRNNCell(n_hidden) enc_cell = tf.nn.rnn_cell.DropoutWrapper(enc_cell, output_keep_prob=0.5) _, enc_states = tf.nn.dynamic_rnn(enc_cell, enc_input, dtype=tf.float32) # encoder state will go to decoder initial_state, enc_states : [batch_size, n_hidden(=128)] with tf.variable_scope('decode'): dec_cell = tf.nn.rnn_cell.BasicRNNCell(n_hidden) dec_cell = tf.nn.rnn_cell.DropoutWrapper(dec_cell, output_keep_prob=0.5) outputs, _ = tf.nn.dynamic_rnn(dec_cell, dec_input, initial_state=enc_states, dtype=tf.float32) # outputs : [batch_size, max_len+1, n_hidden(=128)] model = tf.layers.dense(outputs, n_class, activation=None) # model : [batch_size, max_len+1, n_class] cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=model, labels=targets)) optimizer = tf.train.AdamOptimizer(0.001).minimize(cost) # Training sess = tf.Session() sess.run(tf.global_variables_initializer()) input_batch, output_batch, target_batch = make_batch(seq_data) for epoch in range(5000): _, loss = sess.run([optimizer, cost], feed_dict={enc_input: input_batch, dec_input: output_batch, targets: target_batch}) if (epoch + 1)%1000 == 0: print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) # Test def translate(word): seq_data = [word, 'P' * len(word)] input_batch, output_batch, _ = make_batch([seq_data]) prediction = tf.argmax(model, 2) result = sess.run(prediction, feed_dict={enc_input: input_batch, dec_input: output_batch}) decoded = [char_arr[i] for i in result[0]] end = decoded.index('E') translated = ''.join(decoded[:end]) return translated.replace('P','') print('test') print('man ->', translate('man')) print('mans ->', translate('mans')) print('king ->', translate('king')) print('black ->', translate('black')) print('upp ->', translate('upp')) ================================================ FILE: archive/tensorflow/v1/4-2.Seq2Seq(Attention)/Seq2Seq(Attention).py ================================================ # code by Tae Hwan Jung(Jeff Jung) @graykode import tensorflow as tf import matplotlib.pyplot as plt import numpy as np tf.reset_default_graph() # S: Symbol that shows starting of decoding input # E: Symbol that shows starting of decoding output # P: Symbol that will fill in blank sequence if current batch data size is short than time steps sentences = ['ich mochte ein bier P', 'S i want a beer', 'i want a beer E'] word_list = " ".join(sentences).split() word_list = list(set(word_list)) word_dict = {w: i for i, w in enumerate(word_list)} number_dict = {i: w for i, w in enumerate(word_list)} n_class = len(word_dict) # vocab list # Parameter n_step = 5 # maxium number of words in one sentence(=number of time steps) n_hidden = 128 def make_batch(sentences): input_batch = [np.eye(n_class)[[word_dict[n] for n in sentences[0].split()]]] output_batch = [np.eye(n_class)[[word_dict[n] for n in sentences[1].split()]]] target_batch = [[word_dict[n] for n in sentences[2].split()]] return input_batch, output_batch, target_batch # Model enc_inputs = tf.placeholder(tf.float32, [None, None, n_class]) # [batch_size, n_step, n_class] dec_inputs = tf.placeholder(tf.float32, [None, None, n_class]) # [batch_size, n_step, n_class] targets = tf.placeholder(tf.int64, [1, n_step]) # [batch_size, n_step], not one-hot # Linear for attention attn = tf.Variable(tf.random_normal([n_hidden, n_hidden])) out = tf.Variable(tf.random_normal([n_hidden * 2, n_class])) def get_att_score(dec_output, enc_output): # enc_output [n_step, n_hidden] score = tf.squeeze(tf.matmul(enc_output, attn), 0) # score : [n_hidden] dec_output = tf.squeeze(dec_output, [0, 1]) # dec_output : [n_hidden] return tf.tensordot(dec_output, score, 1) # inner product make scalar value def get_att_weight(dec_output, enc_outputs): attn_scores = [] # list of attention scalar : [n_step] enc_outputs = tf.transpose(enc_outputs, [1, 0, 2]) # enc_outputs : [n_step, batch_size, n_hidden] for i in range(n_step): attn_scores.append(get_att_score(dec_output, enc_outputs[i])) # Normalize scores to weights in range 0 to 1 return tf.reshape(tf.nn.softmax(attn_scores), [1, 1, -1]) # [1, 1, n_step] model = [] Attention = [] with tf.variable_scope('encode'): enc_cell = tf.nn.rnn_cell.BasicRNNCell(n_hidden) enc_cell = tf.nn.rnn_cell.DropoutWrapper(enc_cell, output_keep_prob=0.5) # enc_outputs : [batch_size(=1), n_step(=decoder_step), n_hidden(=128)] # enc_hidden : [batch_size(=1), n_hidden(=128)] enc_outputs, enc_hidden = tf.nn.dynamic_rnn(enc_cell, enc_inputs, dtype=tf.float32) with tf.variable_scope('decode'): dec_cell = tf.nn.rnn_cell.BasicRNNCell(n_hidden) dec_cell = tf.nn.rnn_cell.DropoutWrapper(dec_cell, output_keep_prob=0.5) inputs = tf.transpose(dec_inputs, [1, 0, 2]) hidden = enc_hidden for i in range(n_step): # time_major True mean inputs shape: [max_time, batch_size, ...] dec_output, hidden = tf.nn.dynamic_rnn(dec_cell, tf.expand_dims(inputs[i], 1), initial_state=hidden, dtype=tf.float32, time_major=True) attn_weights = get_att_weight(dec_output, enc_outputs) # attn_weights : [1, 1, n_step] Attention.append(tf.squeeze(attn_weights)) # matrix-matrix product of matrices [1, 1, n_step] x [1, n_step, n_hidden] = [1, 1, n_hidden] context = tf.matmul(attn_weights, enc_outputs) dec_output = tf.squeeze(dec_output, 0) # [1, n_step] context = tf.squeeze(context, 1) # [1, n_hidden] model.append(tf.matmul(tf.concat((dec_output, context), 1), out)) # [n_step, batch_size(=1), n_class] trained_attn = tf.stack([Attention[0], Attention[1], Attention[2], Attention[3], Attention[4]], 0) # to show attention matrix model = tf.transpose(model, [1, 0, 2]) # model : [n_step, n_class] prediction = tf.argmax(model, 2) cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=model, labels=targets)) optimizer = tf.train.AdamOptimizer(0.001).minimize(cost) # Training and Test with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) for epoch in range(2000): input_batch, output_batch, target_batch = make_batch(sentences) _, loss, attention = sess.run([optimizer, cost, trained_attn], feed_dict={enc_inputs: input_batch, dec_inputs: output_batch, targets: target_batch}) if (epoch + 1) % 400 == 0: print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) predict_batch = [np.eye(n_class)[[word_dict[n] for n in 'P P P P P'.split()]]] result = sess.run(prediction, feed_dict={enc_inputs: input_batch, dec_inputs: predict_batch}) print(sentences[0].split(), '->', [number_dict[n] for n in result[0]]) # Show Attention fig = plt.figure(figsize=(5, 5)) ax = fig.add_subplot(1, 1, 1) ax.matshow(attention, cmap='viridis') ax.set_xticklabels([''] + sentences[0].split(), fontdict={'fontsize': 14}) ax.set_yticklabels([''] + sentences[2].split(), fontdict={'fontsize': 14}) plt.show() ================================================ FILE: archive/tensorflow/v1/4-3.Bi-LSTM(Attention)/Bi-LSTM(Attention).py ================================================ ''' code by Tae Hwan Jung(Jeff Jung) @graykode Reference : https://github.com/prakashpandey9/Text-Classification-Pytorch/blob/master/models/LSTM_Attn.py ''' import tensorflow as tf import matplotlib.pyplot as plt import numpy as np tf.reset_default_graph() # Bi-LSTM(Attention) Parameters embedding_dim = 2 n_hidden = 5 # number of hidden units in one cell n_step = 3 # all sentence is consist of 3 words n_class = 2 # 0 or 1 # 3 words sentences (=sequence_length is 3) sentences = ["i love you", "he loves me", "she likes baseball", "i hate you", "sorry for that", "this is awful"] labels = [1, 1, 1, 0, 0, 0] # 1 is good, 0 is not good. word_list = " ".join(sentences).split() word_list = list(set(word_list)) word_dict = {w: i for i, w in enumerate(word_list)} vocab_size = len(word_dict) input_batch = [] for sen in sentences: input_batch.append(np.asarray([word_dict[n] for n in sen.split()])) target_batch = [] for out in labels: target_batch.append(np.eye(n_class)[out]) # ONE-HOT : To using Tensor Softmax Loss function # LSTM Model X = tf.placeholder(tf.int32, [None, n_step]) Y = tf.placeholder(tf.int32, [None, n_class]) out = tf.Variable(tf.random_normal([n_hidden * 2, n_class])) embedding = tf.Variable(tf.random_uniform([vocab_size, embedding_dim])) input = tf.nn.embedding_lookup(embedding, X) # [batch_size, len_seq, embedding_dim] lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(n_hidden) lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(n_hidden) # output : [batch_size, len_seq, n_hidden], states : [batch_size, n_hidden] output, final_state = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell,lstm_bw_cell, input, dtype=tf.float32) # Attention output = tf.concat([output[0], output[1]], 2) # output[0] : lstm_fw, output[1] : lstm_bw final_hidden_state = tf.concat([final_state[1][0], final_state[1][1]], 1) # final_hidden_state : [batch_size, n_hidden * num_directions(=2)] final_hidden_state = tf.expand_dims(final_hidden_state, 2) # final_hidden_state : [batch_size, n_hidden * num_directions(=2), 1] attn_weights = tf.squeeze(tf.matmul(output, final_hidden_state), 2) # attn_weights : [batch_size, n_step] soft_attn_weights = tf.nn.softmax(attn_weights, 1) context = tf.matmul(tf.transpose(output, [0, 2, 1]), tf.expand_dims(soft_attn_weights, 2)) # context : [batch_size, n_hidden * num_directions(=2), 1] context = tf.squeeze(context, 2) # [batch_size, n_hidden * num_directions(=2)] model = tf.matmul(context, out) cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=model, labels=Y)) optimizer = tf.train.AdamOptimizer(0.001).minimize(cost) # Model-Predict hypothesis = tf.nn.softmax(model) predictions = tf.argmax(hypothesis, 1) # Training with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) for epoch in range(5000): _, loss, attention = sess.run([optimizer, cost, soft_attn_weights], feed_dict={X: input_batch, Y: target_batch}) if (epoch + 1)%1000 == 0: print('Epoch:', '%06d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) # Test test_text = 'sorry hate you' tests = [np.asarray([word_dict[n] for n in test_text.split()])] predict = sess.run([predictions], feed_dict={X: tests}) result = predict[0][0] if result == 0: print(test_text,"is Bad Mean...") else: print(test_text,"is Good Mean!!") fig = plt.figure(figsize=(6, 3)) # [batch_size, n_step] ax = fig.add_subplot(1, 1, 1) ax.matshow(attention, cmap='viridis') ax.set_xticklabels([''] + ['first_word', 'second_word', 'third_word'], fontdict={'fontsize': 14}, rotation=90) ax.set_yticklabels([''] + ['batch_1', 'batch_2', 'batch_3', 'batch_4', 'batch_5', 'batch_6'], fontdict={'fontsize': 14}) plt.show()